Kagawa, H., Javali, A., Khoei, H.H., Sommer, T.M., Sestini, G., Novatchkova, M., Scholte Op Reimer, Y., Castel, G., Bruneau, A., Maenhoudt, N., et al. (2021). Human blastoids model blastocyst development and implantation. Nature 1–9.
Load required packages.
library(tidyverse)
library(Matrix)
library(patchwork)
library(extrafont)
Sys.time()
## [1] "2022-01-18 00:33:31 CST"
source(
file = file.path(
SCRIPT_DIR,
"utilities.R"
)
)
plot_embedding_highlight <- function(embedding, x, y, label, n_cols = 3) {
cell_metadata_selected <- x
selected_column <- y
purrr::map(levels(cell_metadata_selected[[selected_column]]), \(x) {
values <- embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::mutate(
value = case_when(
.data[[selected_column]] == x ~ "1",
.data[[selected_column]] != x ~ "0"
)
) |>
dplyr::pull(value) |>
as.integer() |>
as.factor()
plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = values,
label = glue::glue(
"{label}; ",
"{x}: {sum(as.integer(as.character(values)), na.rm = TRUE)}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = c("grey70", "salmon"),
na.value = "#7F7F7F"
) +
ggplot2::annotate(
geom = "text",
x = Inf,
y = Inf,
label = sum(as.integer(as.character(values)), na.rm = TRUE),
size = 5 / ggplot2::.pt,
hjust = 1,
vjust = 1,
na.rm = FALSE
)
}) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = n_cols) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)
}
PROJECT_DIR <- "/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation"
Raw fastq files of these datasets were downloaded and re-processed as described in Yu et al. 2021 to minimize platform and processing differences.
ad <- reticulate::import(module = "anndata", convert = TRUE)
print(ad$`__version__`)
## [1] "0.7.6"
adata_files <- purrr::map(c("PRJNA737139", "PRJEB11202", "PRJNA431392", "PRJEB40781"), \(x) {
file.path(
PROJECT_DIR,
"raw",
"public",
x,
"matrix",
"adata.h5ad"
)
})
purrr::map_lgl(adata_files, file.exists)
## [1] TRUE TRUE TRUE TRUE
BACKED <- NULL
matrix_readcount_use <- purrr::map(adata_files, function(x) {
ad$read_h5ad(
filename = x, backed = BACKED
) |>
convert_adata()
}) |>
purrr::reduce(cbind)
matrix_readcount_use |> dim()
## [1] 33538 10702
BACKED <- "r"
cell_metadata <- purrr::map(adata_files, function(x) {
ad$read_h5ad(
filename = x, backed = BACKED
)$obs |>
tibble::rownames_to_column(var = "cell") |>
dplyr::select(cell, everything())
}) |>
dplyr::bind_rows()
cell_metadata |> head()
Check memory usage.
purrr::walk(list(matrix_readcount_use, cell_metadata), \(x) {
print(object.size(x), units = "auto", standard = "SI")
})
## 922.6 MB
## 1.1 MB
cell_metadata_PRJNA737139 <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw",
"public",
"PRJNA737139",
"matrix/cell_metadata.csv"
)
) |>
dplyr::mutate(
lineage = factor(
lineage,
levels = c(
"Naive H9",
"Primed H9",
"TSCs",
"Blastoid_24h",
"Blastoid_60h",
"Blastoid_96h"
)
),
source_name = factor(
source_name
)
)
cell_metadata_PRJNA737139 |> head()
The expression matrix was re-generated from raw fastq files. To make this analysis comparable with the original publication, we retrieved the cell ids from the deposited dataset.
Retrieve the cell metadata from Gene Expression Omnibus.
cell_metadata_PRJNA737139_author <- vroom::vroom(
file = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE177nnn/GSE177689/suppl/GSE177689_blastoid.H9.okae_bts5__scRNAseq.unfiltered__metadata.tsv.gz"
)
## Rows: 2715 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): samplename, treatment
## dbl (7): sampleid, nFeature_RNA, percent.mt, blastoid.Fig2b.lowres, blastoid...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cell_metadata_PRJNA737139_author |> head()
num_cells_used_in_Fig2b <- cell_metadata_PRJNA737139_author |>
dplyr::filter(
!is.na(blastoid.Fig2b.lowres)
) |>
nrow()
print(
glue::glue("Total number of cells deposited: {nrow(cell_metadata_PRJNA737139_author)}"),
glue::glue("Number of cells used in Fig. 2b: {num_cells_used_in_Fig2b}")
)
## Total number of cells deposited: 2715
## Number of cells used in Fig. 2b: 2067
num_cells_deposited_blastoid <- cell_metadata_PRJNA737139_author |>
dplyr::filter(
stringr::str_starts(string = treatment, pattern = "PALLY"),
) |>
nrow()
num_cells_used_in_Fig2b_blastoid <- cell_metadata_PRJNA737139_author |>
dplyr::filter(
stringr::str_starts(string = treatment, pattern = "PALLY"),
!is.na(blastoid.Fig2b.lowres)
) |>
nrow()
print(
glue::glue("Total number of blastoid cells deposited: {num_cells_deposited_blastoid}"),
glue::glue("Number of blastoid cells used in Fig. 2b: {num_cells_used_in_Fig2b_blastoid}")
)
## Total number of blastoid cells deposited: 2311
## Number of blastoid cells used in Fig. 2b: 1672
2067 out of total 2715 deposited cells (73.1%), and 1672 out of 2311 deposited blastoid cells (72.3%) are used in Fig. 2a-d.
EMBEDDING_FILE <- glue::glue(
"embedding_ncomponents10_ccc1_seed20210719.csv.gz"
)
embedding_2067 <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw",
"public",
"PRJNA737139",
"clustering",
"PRJNA737139",
"exploring",
"Scanpy_Harmony",
EMBEDDING_FILE
)
)
## Rows: 2067 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): cell, batch
## dbl (14): louvain, leiden, x_tsne, y_tsne, x_fitsne, y_fitsne, x_umap_min_di...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
GEOM_POINT_SIZE <- 0.6
RASTERISED <- TRUE
x_column <- "x_umap_min_dist=0.1"
y_column <- "y_umap_min_dist=0.1"
EMBEDDING_TITLE_PREFIX <- "UMAP"
p_embedding_leiden <- plot_embedding(
embedding = embedding_2067[, c(x_column, y_column)],
color_values = embedding_2067$leiden |> as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Leiden"),
label_position = NULL,
show_color_value_labels = TRUE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
shuffle_values = FALSE,
rasterise = RASTERISED
) +
theme_customized()
p_embedding_UMI <- plot_embedding(
embedding = embedding_2067[, c(x_column, y_column)],
color_values = log10(Matrix::colSums(matrix_readcount_use[, embedding_2067$cell])),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; UMI"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE * 1,
geom_point_alpha = 1,
sort_values = TRUE,
shuffle_values = FALSE,
label_size = 2.5,
label_hjust = 0,
label_vjust = 0,
rasterise = RASTERISED,
legend_size = 2,
legend_ncol = 1
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
p_embedding_MT <- plot_embedding(
embedding = embedding_2067[, c(x_column, y_column)],
color_values = embedding_2067 |>
dplyr::left_join(
cell_metadata # |> dplyr::select(-batch)
) |>
dplyr::pull(mt_percentage),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; MT %"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE * 1,
geom_point_alpha = 1,
sort_values = TRUE,
shuffle_values = FALSE,
label_size = 2.5,
label_hjust = 0,
label_vjust = 0,
rasterise = RASTERISED,
legend_size = 2,
legend_ncol = 1
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
selected_feature <- "ENSG00000204531_POU5F1"
p_embedding_POU5F1 <- plot_embedding(
embedding = embedding_2067[, c(x_column, y_column)],
color_values = log10(calc_cpm(matrix_readcount_use)[selected_feature, embedding_2067$cell] + 1),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; {selected_feature}"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
geom_point_alpha = 1,
sort_values = TRUE,
shuffle_values = FALSE,
label_size = 2.5,
label_hjust = 0,
label_vjust = 0,
rasterise = FALSE,
legend_size = 2,
legend_ncol = 1
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
purrr::reduce(
list(
p_embedding_leiden,
p_embedding_UMI,
p_embedding_MT,
p_embedding_POU5F1
), `+`
) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)
Summary
embedding_2067 |>
dplyr::left_join(
cell_metadata
) |>
dplyr::mutate(
num_umis = Matrix::colSums(matrix_readcount_use[, cell]),
num_genes = Matrix::colSums(matrix_readcount_use[, cell] > 0),
) |>
dplyr::group_by(leiden) |>
dplyr::summarise(
num_cells = n(),
median_umis = median(num_umis),
median_genes = median(num_genes),
meidan_MT = median(mt_percentage)
) |>
gt::gt() |>
gt::tab_options(table.font.size = "median")
leiden | num_cells | median_umis | median_genes | meidan_MT |
---|---|---|---|---|
0 | 205 | 2140756 | 5387.0 | 0.0529720842 |
1 | 182 | 2311908 | 5423.0 | 0.0404300979 |
2 | 173 | 1631691 | 4577.0 | 0.0488606864 |
3 | 156 | 2319072 | 6379.5 | 0.0413304999 |
4 | 152 | 1390714 | 3764.5 | 0.0543443623 |
5 | 149 | 855797 | 3103.0 | 0.0364863153 |
6 | 126 | 2646474 | 6842.5 | 0.0501413970 |
7 | 124 | 2202163 | 6414.5 | 0.0413531842 |
8 | 119 | 1673032 | 4618.0 | 0.0522041437 |
9 | 115 | 1849883 | 5760.0 | 0.0506782652 |
10 | 115 | 1738237 | 3665.0 | 0.0002466655 |
11 | 114 | 1201448 | 3621.0 | 0.0016033615 |
12 | 109 | 1327329 | 3660.0 | 0.0361322014 |
13 | 103 | 1507763 | 4685.0 | 0.0376887867 |
14 | 79 | 1519467 | 4506.0 | 0.0003062544 |
15 | 46 | 1677120 | 4610.0 | 0.0469316175 |
embedding_2067 |>
dplyr::left_join(
cell_metadata,
) |>
dplyr::left_join(
cell_metadata_PRJNA737139
) |>
dplyr::mutate(
num_umis = Matrix::colSums(matrix_readcount_use[, cell]),
num_genes = Matrix::colSums(matrix_readcount_use[, cell] > 0),
) |>
dplyr::group_by(lineage) |>
dplyr::summarise(
num_cells = n(),
median_umis = median(num_umis),
median_genes = median(num_genes),
meidan_MT = median(mt_percentage)
) |>
gt::gt() |>
gt::tab_options(table.font.size = "median")
lineage | num_cells | median_umis | median_genes | meidan_MT |
---|---|---|---|---|
Naive H9 | 143 | 2322872 | 6187.0 | 0.03970047 |
Primed H9 | 124 | 2202163 | 6414.5 | 0.04135318 |
TSCs | 128 | 2642517 | 6831.0 | 0.05028367 |
Blastoid_24h | 223 | 2036212 | 5169.0 | 0.04553170 |
Blastoid_60h | 460 | 1964516 | 4855.5 | 0.03979052 |
Blastoid_96h | 989 | 1465353 | 4264.0 | 0.04480325 |
plot_embedding_highlight(
embedding = embedding_2067,
x = cell_metadata_PRJNA737139,
y = "lineage",
label = "Kagawa et al. 2021",
n_cols = 2
)
FEATURES_SELECTED <- c(
"ENSG00000171872_KLF17",
"ENSG00000204531_POU5F1",
"ENSG00000111704_NANOG",
"ENSG00000186103_ARGFX",
#
"ENSG00000164736_SOX17",
"ENSG00000125798_FOXA2",
"ENSG00000136574_GATA4",
"ENSG00000134853_PDGFRA",
#
"ENSG00000179348_GATA2",
"ENSG00000070915_SLC12A3",
"ENSG00000165556_CDX2",
"ENSG00000007866_TEAD3"
)
purrr::map(FEATURES_SELECTED, \(x) {
selected_feature <- x
cat(selected_feature, "\n")
values <- log10(calc_cpm(matrix_readcount_use[, embedding_2067$cell])[selected_feature, ] + 1)
plot_embedding(
embedding = embedding_2067[, c(x_column, y_column)],
color_values = values,
label = paste(
EMBEDDING_TITLE_PREFIX,
selected_feature |> stringr::str_remove(pattern = "^E.+_"),
sep = "; "
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
scale_color_viridis_c(
na.value = "grey80"
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
}) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 3, byrow = FALSE) +
patchwork::plot_annotation(
theme = theme(plot.margin = margin())
)
## ENSG00000171872_KLF17
## ENSG00000204531_POU5F1
## ENSG00000111704_NANOG
## ENSG00000186103_ARGFX
## ENSG00000164736_SOX17
## ENSG00000125798_FOXA2
## ENSG00000136574_GATA4
## ENSG00000134853_PDGFRA
## ENSG00000179348_GATA2
## ENSG00000070915_SLC12A3
## ENSG00000165556_CDX2
## ENSG00000007866_TEAD3
Instead of only including a subset of each dataset, this embedding includes all the cells mentioned in their respective publications to cover the full spectrum of cellular diversity.
Included cells and their lineage annotations are the same as in their respective publications. See here for details.
EMBEDDING_FILE <- glue::glue(
"embedding_ncomponents18_ccc1_seed20210719.csv.gz"
)
embedding <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw",
"public",
"PRJNA737139",
"clustering",
"PRJNA737139_PRJEB11202_PRJNA431392_PRJEB40781",
"exploring",
"Scanpy_Harmony_variable",
EMBEDDING_FILE
)
)
embedding |> head()
studies <- tibble::tribble(
~bioproject, ~citation,
"PRJEB11202", "Petropoulos et al. 2016",
"PRJEB40781", "Tyser et al. 2021",
"PRJNA431392", "Zhou et al. 2019",
"PRJNA737139", "Kagawa et al. 2021",
"PRJNA632839", "Yu et al. 2021",
"PRJNA658478", "Liu et al. 2021",
"PRJNA720968", "Yanagida et al. 2021",
"PRJNA667174", "Fan et al. 2021",
"PRJNA738498", "Sozen et al. 2021",
"PRJNA737139", "Kagawa et al. 2021"
)
studies <- setNames(
object = studies$citation,
nm = studies$bioproject
)
embedding |>
dplyr::count(batch, name = "num_cells") |>
dplyr::mutate(
study = studies[batch]
) |>
gt::gt() |>
gt::tab_options(table.font.size = "median") |>
gt::summary_rows(
columns = c(num_cells),
fns = list(
Sum = ~ sum(.)
),
decimals = 0
)
batch | num_cells | study | |
---|---|---|---|
PRJEB11202 | 1529 | Petropoulos et al. 2016 | |
PRJEB40781 | 1195 | Tyser et al. 2021 | |
PRJNA431392 | 5911 | Zhou et al. 2019 | |
PRJNA737139 | 2067 | Kagawa et al. 2021 | |
Sum | — | 10,702 | — |
# PRJNA431392; Zhou et al. 2019
embryos_selected_PRJNA431392 <- c(
"ha_D6_E2",
"hm_D6_E1",
"hm_D6_E2",
"hm_D8_E2",
"hm_D8_E3",
"hm_D8_E5",
"ha_D8_E1",
"hm_D8_E1",
"hv_D8_E1",
"hv_D8_E2",
"hv_D8_E3",
"hv_D10_E6",
"ha_D10_E1",
"ha_D10_E2",
"hm_D10_E4",
"hm_D10_E9",
"hv_D10_E7",
"hv_D10_E8",
"ha_D12_E1",
"hv_D12_E1",
"hv_D12_E2"
)
cell_metadata_PRJNA431392 <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw",
"public",
"PRJNA431392",
"matrix/cell_metadata.csv"
)
) |>
dplyr::mutate(
developmental_stage = factor(
Day,
levels = stringr::str_sort(
unique(Day),
numeric = TRUE
)
),
lineage = factor(
Lineage,
levels = c(
"EPI",
"PE",
"TE",
"MIX"
)
),
`3184` = case_when(
Ori_Day_Emb %in% embryos_selected_PRJNA431392 ~ "1",
TRUE ~ "0"
)
) |>
dplyr::rename(cell = Sample)
# PRJEB40781; Tyser et al. 2021
cell_metadata_PRJEB40781 <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw",
"public",
"PRJEB40781",
"matrix/cell_metadata.csv"
)
) |>
dplyr::mutate(
lineage = factor(lineage),
lineage_ontology = factor(lineage_ontology),
sampling_site = factor(sampling_site)
)
# PRJEB11202; Petropoulos et al. 2016
cell_metadata_PRJEB11202 <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw",
"public",
"PRJEB11202",
"matrix/cell_metadata.csv"
)
) |>
dplyr::mutate(
developmental_stage = stringr::str_remove(
string = individual,
pattern = "\\..+$"
),
developmental_stage = factor(
developmental_stage,
levels = stringr::str_sort(
unique(developmental_stage),
numeric = TRUE
)
),
#
lineage = factor(
inferred_lineage,
levels = c(
"epiblast",
"primitive_endoderm",
"trophectoderm",
"not_applicable"
)
)
) |>
dplyr::select(
cell, lineage, developmental_stage
)
p_embedding_leiden <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding$leiden |> as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Leiden"),
label_position = NULL,
show_color_value_labels = TRUE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = FALSE,
shuffle_values = FALSE,
rasterise = RASTERISED
) +
theme_customized()
p_embedding_UMI <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = log10(Matrix::colSums(matrix_readcount_use[, embedding$cell])),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; UMI"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE * 1,
geom_point_alpha = 1,
sort_values = TRUE,
shuffle_values = FALSE,
label_size = 2.5,
label_hjust = 0,
label_vjust = 0,
rasterise = RASTERISED,
legend_size = 2,
legend_ncol = 1
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
p_embedding_MT <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(
cell_metadata # |> dplyr::select(-batch)
) |>
dplyr::pull(mt_percentage),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; MT %"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE * 1,
geom_point_alpha = 1,
sort_values = TRUE,
shuffle_values = FALSE,
label_size = 2.5,
label_hjust = 0,
label_vjust = 0,
rasterise = RASTERISED,
legend_size = 2,
legend_ncol = 1
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
)
p_embedding_batch <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding$batch |> as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Batch"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE / 2,
sort_values = FALSE,
shuffle_values = TRUE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
values = scales::hue_pal()(n = length(unique(embedding$batch))),
labels = studies
)
purrr::reduce(
list(
p_embedding_leiden,
p_embedding_batch,
p_embedding_UMI,
p_embedding_MT
), `+`
) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)
purrr::map(unique(embedding$batch), \(x) {
plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = as.integer(embedding$batch == x) |> as.factor(),
label = glue::glue("{EMBEDDING_TITLE_PREFIX}; {studies[x]}, {sum(as.integer(embedding$batch == x), na.rm = TRUE)}"),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = FALSE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
values = c("grey70", "salmon")
)
}) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)
bioproject <- "PRJEB11202"
cell_metadata_selected <- cell_metadata_PRJEB11202
selected_column <- "developmental_stage"
p_embedding_developmental_stage <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
selected_column <- "lineage"
p_embedding_lineage <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
list(
p_embedding_lineage,
p_embedding_developmental_stage
) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)
Salmon: highlighted group of cells; Light grey: cells belonging to this dataset but not the highlighted group; Dark grey: cells belonging to other datasets.
plot_embedding_highlight(
embedding = embedding,
x = cell_metadata_selected,
y = "lineage",
label = studies[bioproject],
n_cols = 2
)
plot_embedding_highlight(
embedding = embedding,
x = cell_metadata_selected,
y = "developmental_stage",
label = studies[bioproject],
n_cols = 2
)
bioproject <- "PRJNA431392"
cell_metadata_selected <- cell_metadata_PRJNA431392
selected_column <- "developmental_stage"
p_embedding_developmental_stage <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
selected_column <- "lineage"
p_embedding_lineage <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
selected_column <- "3184"
p_embedding_chosen <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = c("grey70", "salmon"),
na.value = "#7F7F7F"
)
list(
p_embedding_lineage,
p_embedding_developmental_stage,
p_embedding_chosen
) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)
plot_embedding_highlight(
embedding = embedding,
x = cell_metadata_selected,
y = "developmental_stage",
label = studies[bioproject],
n_cols = 2
)
bioproject <- "PRJEB40781"
cell_metadata_selected <- cell_metadata_PRJEB40781
selected_column <- "sampling_site"
p_embedding_sampling_site <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "grey70"
)
selected_column <- "lineage"
p_embedding_lineage <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
list(
p_embedding_lineage,
p_embedding_sampling_site
) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)
plot_embedding_highlight(
embedding = embedding,
x = cell_metadata_selected,
y = "lineage",
label = studies[bioproject],
n_cols = 2
)
bioproject <- "PRJNA737139"
cell_metadata_selected <- cell_metadata_PRJNA737139
selected_column <- "source_name"
p_embedding_source <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
selected_column <- "lineage"
p_embedding_lineage <- plot_embedding(
embedding = embedding[, c(x_column, y_column)],
color_values = embedding |>
dplyr::left_join(cell_metadata_selected) |>
dplyr::pull(.data[[selected_column]]),
label = glue::glue(
"{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
"{selected_column |>
stringr::str_to_title() |>
stringr::str_replace(pattern = \"_\", replacement = \" \")}"
),
label_position = NULL,
show_color_value_labels = FALSE,
show_color_legend = TRUE,
geom_point_size = GEOM_POINT_SIZE,
sort_values = TRUE,
shuffle_values = FALSE,
rasterise = RASTERISED,
legend_size = 2
) +
theme_customized(
legend_key_size = 2,
legend_text_size = 5
) +
scale_color_manual(
na.translate = TRUE,
values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
na.value = "#7F7F7F"
)
list(
p_embedding_source,
p_embedding_lineage
) |>
purrr::reduce(`+`) +
patchwork::plot_layout(ncol = 2) +
patchwork::plot_annotation(
theme = ggplot2::theme(plot.margin = ggplot2::margin())
)
plot_embedding_highlight(
embedding = embedding,
x = cell_metadata_selected,
y = "lineage",
label = studies[bioproject],
n_cols = 2
)
devtools::session_info()$platform
## setting value
## version R version 4.1.2 (2021-11-01)
## os macOS Monterey 12.1
## system aarch64, darwin20.6.0
## ui unknown
## language (EN)
## collate en_US.UTF-8
## ctype en_US.UTF-8
## tz America/Chicago
## date 2022-01-18
## pandoc 2.14.0.3 @ /Applications/RStudio.app/Contents/MacOS/pandoc/ (via rmarkdown)
devtools::session_info()$pack |>
as_tibble() |>
dplyr::select(
package,
loadedversion,
date,
`source`
) |>
# print(n = nrow(.))
gt::gt() |>
gt::tab_options(table.font.size = "median")
package | loadedversion | date | source |
---|---|---|---|
assertthat | 0.2.1 | 2019-03-21 | CRAN (R 4.1.1) |
backports | 1.4.1 | 2021-12-13 | CRAN (R 4.1.2) |
beeswarm | 0.4.0 | 2021-06-01 | CRAN (R 4.1.2) |
bit | 4.0.4 | 2020-08-04 | CRAN (R 4.1.1) |
bit64 | 4.0.5 | 2020-08-30 | CRAN (R 4.1.1) |
brio | 1.1.3 | 2021-11-30 | CRAN (R 4.1.2) |
broom | 0.7.11 | 2022-01-03 | CRAN (R 4.1.2) |
bslib | 0.3.1 | 2021-10-06 | CRAN (R 4.1.1) |
cachem | 1.0.6 | 2021-08-19 | CRAN (R 4.1.1) |
callr | 3.7.0 | 2021-04-20 | CRAN (R 4.1.1) |
cellranger | 1.1.0 | 2016-07-27 | CRAN (R 4.1.1) |
checkmate | 2.0.0 | 2020-02-06 | CRAN (R 4.1.1) |
cli | 3.1.0 | 2021-10-27 | CRAN (R 4.1.1) |
colorspace | 2.0-2 | 2021-06-24 | CRAN (R 4.1.1) |
crayon | 1.4.2 | 2021-10-29 | CRAN (R 4.1.1) |
curl | 4.3.2 | 2021-06-23 | CRAN (R 4.1.1) |
data.table | 1.14.2 | 2021-09-27 | CRAN (R 4.1.1) |
DBI | 1.1.2 | 2021-12-20 | CRAN (R 4.1.2) |
dbplyr | 2.1.1 | 2021-04-06 | CRAN (R 4.1.1) |
desc | 1.4.0 | 2021-09-28 | CRAN (R 4.1.1) |
devtools | 2.4.3.9000 | 2022-01-15 | Github (r-lib/devtools@e2f25cd69031c8d2099106baed894df4109cb7a4) |
digest | 0.6.29 | 2021-12-01 | CRAN (R 4.1.2) |
dplyr | 1.0.7.9000 | 2022-01-12 | Github (tidyverse/dplyr@05013358ace44fe17a51395d49d384232d18d6c1) |
dtplyr | 1.2.0 | 2021-12-05 | CRAN (R 4.1.2) |
ellipsis | 0.3.2 | 2021-04-29 | CRAN (R 4.1.1) |
evaluate | 0.14 | 2019-05-28 | CRAN (R 4.1.1) |
extrafont | 0.17 | 2014-12-08 | CRAN (R 4.1.1) |
extrafontdb | 1.0 | 2012-06-11 | CRAN (R 4.1.1) |
fansi | 1.0.2 | 2022-01-14 | CRAN (R 4.1.2) |
farver | 2.1.0 | 2021-02-28 | CRAN (R 4.1.1) |
fastmap | 1.1.0 | 2021-01-25 | CRAN (R 4.1.1) |
forcats | 0.5.1.9000 | 2021-11-29 | Github (tidyverse/forcats@b4dade0636a46543c30b0b647d97c3ce697c0ada) |
fs | 1.5.2.9000 | 2021-12-09 | Github (r-lib/fs@6d1182fea7e1c1ddbef3b0bba37c0b0a2e09749c) |
gargle | 1.2.0 | 2021-07-02 | CRAN (R 4.1.1) |
generics | 0.1.1 | 2021-10-25 | CRAN (R 4.1.1) |
ggbeeswarm | 0.6.0 | 2017-08-07 | CRAN (R 4.1.2) |
ggplot2 | 3.3.5 | 2021-06-25 | CRAN (R 4.1.1) |
ggrastr | 1.0.1 | 2021-12-08 | Github (VPetukhov/ggrastr@7aed9af2b9cffabda86e6d2af2fa10d4e60cc63d) |
glue | 1.6.0.9000 | 2021-12-21 | Github (tidyverse/glue@76793ef2c376140350c0e1909e66fd404a52b1ef) |
googledrive | 2.0.0 | 2021-07-08 | CRAN (R 4.1.1) |
googlesheets4 | 1.0.0 | 2021-07-21 | CRAN (R 4.1.1) |
gt | 0.3.1.9000 | 2022-01-17 | Github (rstudio/gt@fcabb414c55b70c9e445fbedfb24d52fe394ba61) |
gtable | 0.3.0.9000 | 2021-10-28 | Github (r-lib/gtable@a0bd2721a0a31c8b4391b84aabe98f8c85881140) |
haven | 2.4.3 | 2021-08-04 | CRAN (R 4.1.1) |
highr | 0.9 | 2021-04-16 | CRAN (R 4.1.1) |
hms | 1.1.1 | 2021-09-26 | CRAN (R 4.1.1) |
htmltools | 0.5.2 | 2021-08-25 | CRAN (R 4.1.1) |
httr | 1.4.2 | 2020-07-20 | CRAN (R 4.1.1) |
jquerylib | 0.1.4 | 2021-04-26 | CRAN (R 4.1.1) |
jsonlite | 1.7.3 | 2022-01-17 | CRAN (R 4.1.2) |
knitr | 1.37.1 | 2021-12-21 | https://yihui.r-universe.dev (R 4.1.2) |
labeling | 0.4.2 | 2020-10-20 | CRAN (R 4.1.1) |
lattice | 0.20-45 | 2021-09-22 | CRAN (R 4.1.2) |
lifecycle | 1.0.1 | 2021-09-24 | CRAN (R 4.1.1) |
lubridate | 1.8.0 | 2022-01-15 | Github (tidyverse/lubridate@53e5892a548b3425d6c3bf887542aa105341ab73) |
magrittr | 2.0.1 | 2020-11-17 | CRAN (R 4.1.1) |
Matrix | 1.4-0 | 2021-12-08 | CRAN (R 4.1.2) |
memoise | 2.0.1 | 2021-11-26 | CRAN (R 4.1.2) |
modelr | 0.1.8.9000 | 2021-10-27 | Github (tidyverse/modelr@16168e0624215d9d1a008f3a85de30aeb75302f6) |
munsell | 0.5.0 | 2018-06-12 | CRAN (R 4.1.1) |
patchwork | 1.1.0.9000 | 2021-10-27 | Github (thomasp85/patchwork@79223d3002e7bd7e715a270685c6507d684b2622) |
pillar | 1.6.4 | 2021-10-18 | CRAN (R 4.1.1) |
pkgbuild | 1.3.1 | 2021-12-20 | CRAN (R 4.1.2) |
pkgconfig | 2.0.3 | 2019-09-22 | CRAN (R 4.1.1) |
pkgload | 1.2.4 | 2021-11-30 | CRAN (R 4.1.2) |
png | 0.1-7 | 2013-12-03 | CRAN (R 4.1.1) |
prettyunits | 1.1.1 | 2020-01-24 | CRAN (R 4.1.1) |
processx | 3.5.2 | 2021-04-30 | CRAN (R 4.1.1) |
ps | 1.6.0 | 2021-02-28 | CRAN (R 4.1.1) |
purrr | 0.3.4 | 2020-04-17 | CRAN (R 4.1.1) |
R.cache | 0.15.0 | 2021-04-30 | CRAN (R 4.1.1) |
R.methodsS3 | 1.8.1 | 2020-08-26 | CRAN (R 4.1.1) |
R.oo | 1.24.0 | 2020-08-26 | CRAN (R 4.1.1) |
R.utils | 2.11.0 | 2021-09-26 | CRAN (R 4.1.1) |
R6 | 2.5.1.9000 | 2021-12-09 | Github (r-lib/R6@1b05b89f30fe6713cb9ff51d91fc56bd3016e4b2) |
ragg | 1.2.1.9000 | 2021-12-08 | Github (r-lib/ragg@c68c6665ef894f16c006333658b32bf25d2e9d19) |
Rcpp | 1.0.8 | 2022-01-13 | CRAN (R 4.1.2) |
readr | 2.1.1 | 2021-11-30 | CRAN (R 4.1.2) |
readxl | 1.3.1.9000 | 2022-01-06 | Github (tidyverse/readxl@1059a768436df38fb7b33f8e0415c18585c11ac5) |
remotes | 2.4.2 | 2021-12-02 | Github (r-lib/remotes@fcad17b68b7a19d5363d64adfb0a0426a3a5b3db) |
reprex | 2.0.1 | 2021-08-05 | CRAN (R 4.1.1) |
reticulate | 1.23 | 2022-01-14 | CRAN (R 4.1.2) |
rlang | 0.99.0.9003 | 2022-01-17 | Github (r-lib/rlang@a165f49e841dc848fb01133dd265a83c5a671013) |
rmarkdown | 2.11.8 | 2022-01-15 | Github (rstudio/rmarkdown@52a65394a6e1ab84b10feccf893e2a16ee25aeb3) |
rprojroot | 2.0.2 | 2020-11-15 | CRAN (R 4.1.1) |
rstudioapi | 0.13.0-9000 | 2022-01-15 | Github (rstudio/rstudioapi@5d0f0873dc160779c71bf4b00d8b016b898f6fb5) |
Rttf2pt1 | 1.3.9 | 2021-07-22 | CRAN (R 4.1.1) |
rvest | 1.0.2 | 2021-10-16 | CRAN (R 4.1.1) |
sass | 0.4.0 | 2021-05-12 | CRAN (R 4.1.1) |
scales | 1.1.1 | 2020-05-11 | CRAN (R 4.1.1) |
sessioninfo | 1.2.2 | 2021-12-06 | CRAN (R 4.1.2) |
stringi | 1.7.6 | 2021-11-29 | CRAN (R 4.1.2) |
stringr | 1.4.0.9000 | 2022-01-17 | Github (tidyverse/stringr@3848cd70b1e331e6c20401e4da518ff4c3725324) |
styler | 1.6.2.9000 | 2022-01-17 | Github (r-lib/styler@9274aed613282eca01909ae8c341224055d9c928) |
systemfonts | 1.0.3.9000 | 2021-12-07 | Github (r-lib/systemfonts@414114e645efb316def3d8de1056d855f92d588e) |
testthat | 3.1.1.9000 | 2022-01-13 | Github (r-lib/testthat@f09df60dd881530332b252474e9f35c97f8640be) |
textshaping | 0.3.6 | 2021-10-13 | CRAN (R 4.1.1) |
tibble | 3.1.6.9000 | 2021-12-30 | Github (tidyverse/tibble@2d8d3fddda2b879a82cb58086f2107da4f04befa) |
tidyr | 1.1.4 | 2021-09-27 | CRAN (R 4.1.1) |
tidyselect | 1.1.1 | 2021-04-30 | CRAN (R 4.1.1) |
tidyverse | 1.3.1.9000 | 2021-12-08 | Github (tidyverse/tidyverse@6186fbf09bf359110f8800ff989cbbdd40485eb0) |
tzdb | 0.2.0 | 2021-10-27 | CRAN (R 4.1.1) |
usethis | 2.1.5.9000 | 2022-01-17 | Github (r-lib/usethis@16d14b148cca803e15be4b33a7453a69e8d6fc29) |
utf8 | 1.2.2 | 2021-07-24 | CRAN (R 4.1.1) |
vctrs | 0.3.8 | 2021-04-29 | CRAN (R 4.1.1) |
vipor | 0.4.5 | 2017-03-22 | CRAN (R 4.1.2) |
viridisLite | 0.4.0 | 2021-04-13 | CRAN (R 4.1.1) |
vroom | 1.5.7 | 2021-11-30 | CRAN (R 4.1.2) |
withr | 2.4.3 | 2021-11-30 | CRAN (R 4.1.2) |
xfun | 0.29 | 2021-12-14 | CRAN (R 4.1.2) |
xml2 | 1.3.3 | 2021-11-30 | CRAN (R 4.1.2) |
yaml | 2.2.1 | 2020-02-01 | CRAN (R 4.1.1) |