Kagawa, H., Javali, A., Khoei, H.H., Sommer, T.M., Sestini, G., Novatchkova, M., Scholte Op Reimer, Y., Castel, G., Bruneau, A., Maenhoudt, N., et al. (2021). Human blastoids model blastocyst development and implantation. Nature 1–9.

BioProject Accession: PRJNA737139
GEO Accession: GSE177689

Load required packages.

library(tidyverse)
library(Matrix)
library(patchwork)
library(extrafont)

Sys.time()

## [1] "2022-01-18 00:33:31 CST"

Data preparation

Functions loading

source(
    file = file.path(
        SCRIPT_DIR,
        "utilities.R"
    )
)

plot_embedding_highlight <- function(embedding, x, y, label, n_cols = 3) {
    cell_metadata_selected <- x
    selected_column <- y

    purrr::map(levels(cell_metadata_selected[[selected_column]]), \(x) {
        values <- embedding |>
            dplyr::left_join(cell_metadata_selected) |>
            dplyr::mutate(
                value = case_when(
                    .data[[selected_column]] == x ~ "1",
                    .data[[selected_column]] != x ~ "0"
                )
            ) |>
            dplyr::pull(value) |>
            as.integer() |>
            as.factor()

        plot_embedding(
            embedding = embedding[, c(x_column, y_column)],
            color_values = values,
            label = glue::glue(
                "{label}; ",
                "{x}: {sum(as.integer(as.character(values)), na.rm = TRUE)}"
            ),
            label_position = NULL,
            show_color_value_labels = FALSE,
            show_color_legend = FALSE,
            geom_point_size = GEOM_POINT_SIZE,
            sort_values = TRUE,
            shuffle_values = FALSE,
            rasterise = RASTERISED,
            legend_size = 2
        ) +
            theme_customized(
                legend_key_size = 2,
                legend_text_size = 5
            ) +
            scale_color_manual(
                na.translate = TRUE,
                values = c("grey70", "salmon"),
                na.value = "#7F7F7F"
            ) +
            ggplot2::annotate(
                geom = "text",
                x = Inf,
                y = Inf,
                label = sum(as.integer(as.character(values)), na.rm = TRUE),
                size = 5 / ggplot2::.pt,
                hjust = 1,
                vjust = 1,
                na.rm = FALSE
            )
    }) |>
        purrr::reduce(`+`) +
        patchwork::plot_layout(ncol = n_cols) +
        patchwork::plot_annotation(
            theme = ggplot2::theme(plot.margin = ggplot2::margin())
        )
}

Data loading

PROJECT_DIR <- "/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation"

Matrix

Raw fastq files of these datasets were downloaded and re-processed as described in Yu et al. 2021 to minimize platform and processing differences.

ad <- reticulate::import(module = "anndata", convert = TRUE)
print(ad$`__version__`)

## [1] "0.7.6"

adata_files <- purrr::map(c("PRJNA737139", "PRJEB11202", "PRJNA431392", "PRJEB40781"), \(x) {
    file.path(
        PROJECT_DIR,
        "raw",
        "public",
        x,
        "matrix",
        "adata.h5ad"
    )
})
purrr::map_lgl(adata_files, file.exists)

## [1] TRUE TRUE TRUE TRUE

BACKED <- NULL
matrix_readcount_use <- purrr::map(adata_files, function(x) {
    ad$read_h5ad(
        filename = x, backed = BACKED
    ) |>
        convert_adata()
}) |>
    purrr::reduce(cbind)

matrix_readcount_use |> dim()

## [1] 33538 10702

Metadata

BACKED <- "r"
cell_metadata <- purrr::map(adata_files, function(x) {
    ad$read_h5ad(
        filename = x, backed = BACKED
    )$obs |>
        tibble::rownames_to_column(var = "cell") |>
        dplyr::select(cell, everything())
}) |>
    dplyr::bind_rows()

cell_metadata |> head()

Check memory usage.

purrr::walk(list(matrix_readcount_use, cell_metadata), \(x) {
    print(object.size(x), units = "auto", standard = "SI")
})

## 922.6 MB
## 1.1 MB

Clustering with PSCs and TSCs

Cell metadata of Kagawa et al. 2021

cell_metadata_PRJNA737139 <- vroom::vroom(
    file = file.path(
        PROJECT_DIR,
        "raw",
        "public",
        "PRJNA737139",
        "matrix/cell_metadata.csv"
    )
) |>
    dplyr::mutate(
        lineage = factor(
            lineage,
            levels = c(
                "Naive H9",
                "Primed H9",
                "TSCs",
                "Blastoid_24h",
                "Blastoid_60h",
                "Blastoid_96h"
            )
        ),
        source_name = factor(
            source_name
        )
    )

cell_metadata_PRJNA737139 |> head()

The expression matrix was re-generated from raw fastq files. To make this analysis comparable with the original publication, we retrieved the cell ids from the deposited dataset.

Retrieve the cell metadata from Gene Expression Omnibus.

cell_metadata_PRJNA737139_author <- vroom::vroom(
    file = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE177nnn/GSE177689/suppl/GSE177689_blastoid.H9.okae_bts5__scRNAseq.unfiltered__metadata.tsv.gz"
)

## Rows: 2715 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: "\t"
## chr (2): samplename, treatment
## dbl (7): sampleid, nFeature_RNA, percent.mt, blastoid.Fig2b.lowres, blastoid...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

cell_metadata_PRJNA737139_author |> head()

num_cells_used_in_Fig2b <- cell_metadata_PRJNA737139_author |>
    dplyr::filter(
        !is.na(blastoid.Fig2b.lowres)
    ) |>
    nrow()

print(
    glue::glue("Total number of cells deposited: {nrow(cell_metadata_PRJNA737139_author)}"),
    glue::glue("Number of cells used in Fig. 2b: {num_cells_used_in_Fig2b}")
)

## Total number of cells deposited: 2715
## Number of cells used in Fig. 2b: 2067

num_cells_deposited_blastoid <- cell_metadata_PRJNA737139_author |>
    dplyr::filter(
        stringr::str_starts(string = treatment, pattern = "PALLY"),
    ) |>
    nrow()

num_cells_used_in_Fig2b_blastoid <- cell_metadata_PRJNA737139_author |>
    dplyr::filter(
        stringr::str_starts(string = treatment, pattern = "PALLY"),
        !is.na(blastoid.Fig2b.lowres)
    ) |>
    nrow()

print(
    glue::glue("Total number of blastoid cells deposited: {num_cells_deposited_blastoid}"),
    glue::glue("Number of blastoid cells used in Fig. 2b: {num_cells_used_in_Fig2b_blastoid}")
)

## Total number of blastoid cells deposited: 2311
## Number of blastoid cells used in Fig. 2b: 1672

2067 out of total 2715 deposited cells (73.1%), and 1672 out of 2311 deposited blastoid cells (72.3%) are used in Fig. 2a-d.

Visualization

Embedding

EMBEDDING_FILE <- glue::glue(
    "embedding_ncomponents10_ccc1_seed20210719.csv.gz"
)

embedding_2067 <- vroom::vroom(
    file = file.path(
        PROJECT_DIR,
        "raw",
        "public",
        "PRJNA737139",
        "clustering",
        "PRJNA737139",
        "exploring",
        "Scanpy_Harmony",
        EMBEDDING_FILE
    )
)

## Rows: 2067 Columns: 16
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (2): cell, batch
## dbl (14): louvain, leiden, x_tsne, y_tsne, x_fitsne, y_fitsne, x_umap_min_di...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Clustering

GEOM_POINT_SIZE <- 0.6
RASTERISED <- TRUE
x_column <- "x_umap_min_dist=0.1"
y_column <- "y_umap_min_dist=0.1"

EMBEDDING_TITLE_PREFIX <- "UMAP"

p_embedding_leiden <- plot_embedding(
    embedding = embedding_2067[, c(x_column, y_column)],
    color_values = embedding_2067$leiden |> as.factor(),
    label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Leiden"),
    label_position = NULL,
    show_color_value_labels = TRUE,
    show_color_legend = FALSE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = FALSE,
    shuffle_values = FALSE,
    rasterise = RASTERISED
) +
    theme_customized()

p_embedding_UMI <- plot_embedding(
    embedding = embedding_2067[, c(x_column, y_column)],
    color_values = log10(Matrix::colSums(matrix_readcount_use[, embedding_2067$cell])),
    label = glue::glue("{EMBEDDING_TITLE_PREFIX}; UMI"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE * 1,
    geom_point_alpha = 1,
    sort_values = TRUE,
    shuffle_values = FALSE,
    label_size = 2.5,
    label_hjust = 0,
    label_vjust = 0,
    rasterise = RASTERISED,
    legend_size = 2,
    legend_ncol = 1
) +
    theme_customized(
        legend_key_size = 2,
        legend_text_size = 5
    )

p_embedding_MT <- plot_embedding(
    embedding = embedding_2067[, c(x_column, y_column)],
    color_values = embedding_2067 |>
        dplyr::left_join(
            cell_metadata # |> dplyr::select(-batch)
        ) |>
        dplyr::pull(mt_percentage),
    label = glue::glue("{EMBEDDING_TITLE_PREFIX}; MT %"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE * 1,
    geom_point_alpha = 1,
    sort_values = TRUE,
    shuffle_values = FALSE,
    label_size = 2.5,
    label_hjust = 0,
    label_vjust = 0,
    rasterise = RASTERISED,
    legend_size = 2,
    legend_ncol = 1
) +
    theme_customized(
        legend_key_size = 2,
        legend_text_size = 5
    )

selected_feature <- "ENSG00000204531_POU5F1"
p_embedding_POU5F1 <- plot_embedding(
    embedding = embedding_2067[, c(x_column, y_column)],
    color_values = log10(calc_cpm(matrix_readcount_use)[selected_feature, embedding_2067$cell] + 1),
    label = glue::glue("{EMBEDDING_TITLE_PREFIX}; {selected_feature}"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    geom_point_alpha = 1,
    sort_values = TRUE,
    shuffle_values = FALSE,
    label_size = 2.5,
    label_hjust = 0,
    label_vjust = 0,
    rasterise = FALSE,
    legend_size = 2,
    legend_ncol = 1
) +
    theme_customized(
        legend_key_size = 2,
        legend_text_size = 5
    )

purrr::reduce(
    list(
        p_embedding_leiden,
        p_embedding_UMI,
        p_embedding_MT,
        p_embedding_POU5F1
    ), `+`
) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = ggplot2::theme(plot.margin = ggplot2::margin())
    )

Summary

embedding_2067 |>
    dplyr::left_join(
        cell_metadata
    ) |>
    dplyr::mutate(
        num_umis = Matrix::colSums(matrix_readcount_use[, cell]),
        num_genes = Matrix::colSums(matrix_readcount_use[, cell] > 0),
    ) |>
    dplyr::group_by(leiden) |>
    dplyr::summarise(
        num_cells = n(),
        median_umis = median(num_umis),
        median_genes = median(num_genes),
        meidan_MT = median(mt_percentage)
    ) |>
    gt::gt() |>
    gt::tab_options(table.font.size = "median")

leiden	num_cells	median_umis	median_genes	meidan_MT
0	205	2140756	5387.0	0.0529720842
1	182	2311908	5423.0	0.0404300979
2	173	1631691	4577.0	0.0488606864
3	156	2319072	6379.5	0.0413304999
4	152	1390714	3764.5	0.0543443623
5	149	855797	3103.0	0.0364863153
6	126	2646474	6842.5	0.0501413970
7	124	2202163	6414.5	0.0413531842
8	119	1673032	4618.0	0.0522041437
9	115	1849883	5760.0	0.0506782652
10	115	1738237	3665.0	0.0002466655
11	114	1201448	3621.0	0.0016033615
12	109	1327329	3660.0	0.0361322014
13	103	1507763	4685.0	0.0376887867
14	79	1519467	4506.0	0.0003062544
15	46	1677120	4610.0	0.0469316175

embedding_2067 |>
    dplyr::left_join(
        cell_metadata,
    ) |>
    dplyr::left_join(
        cell_metadata_PRJNA737139
    ) |>
    dplyr::mutate(
        num_umis = Matrix::colSums(matrix_readcount_use[, cell]),
        num_genes = Matrix::colSums(matrix_readcount_use[, cell] > 0),
    ) |>
    dplyr::group_by(lineage) |>
    dplyr::summarise(
        num_cells = n(),
        median_umis = median(num_umis),
        median_genes = median(num_genes),
        meidan_MT = median(mt_percentage)
    ) |>
    gt::gt() |>
    gt::tab_options(table.font.size = "median")

lineage	num_cells	median_umis	median_genes	meidan_MT
Naive H9	143	2322872	6187.0	0.03970047
Primed H9	124	2202163	6414.5	0.04135318
TSCs	128	2642517	6831.0	0.05028367
Blastoid_24h	223	2036212	5169.0	0.04553170
Blastoid_60h	460	1964516	4855.5	0.03979052
Blastoid_96h	989	1465353	4264.0	0.04480325

Cell type

plot_embedding_highlight(
    embedding = embedding_2067,
    x = cell_metadata_PRJNA737139,
    y = "lineage",
    label = "Kagawa et al. 2021",
    n_cols = 2
)

Expression

FEATURES_SELECTED <- c(
    "ENSG00000171872_KLF17",
    "ENSG00000204531_POU5F1",
    "ENSG00000111704_NANOG",
    "ENSG00000186103_ARGFX",
    #
    "ENSG00000164736_SOX17",
    "ENSG00000125798_FOXA2",
    "ENSG00000136574_GATA4",
    "ENSG00000134853_PDGFRA",
    #
    "ENSG00000179348_GATA2",
    "ENSG00000070915_SLC12A3",
    "ENSG00000165556_CDX2",
    "ENSG00000007866_TEAD3"
)

purrr::map(FEATURES_SELECTED, \(x) {
    selected_feature <- x

    cat(selected_feature, "\n")
    values <- log10(calc_cpm(matrix_readcount_use[, embedding_2067$cell])[selected_feature, ] + 1)

    plot_embedding(
        embedding = embedding_2067[, c(x_column, y_column)],
        color_values = values,
        label = paste(
            EMBEDDING_TITLE_PREFIX,
            selected_feature |> stringr::str_remove(pattern = "^E.+_"),
            sep = "; "
        ),
        label_position = NULL,
        show_color_value_labels = FALSE,
        show_color_legend = TRUE,
        geom_point_size = GEOM_POINT_SIZE,
        sort_values = TRUE,
        shuffle_values = FALSE,
        rasterise = RASTERISED,
        legend_size = 2
    ) +
        scale_color_viridis_c(
            na.value = "grey80"
        ) +
        theme_customized(
            legend_key_size = 2,
            legend_text_size = 5
        )
}) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 3, byrow = FALSE) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

## ENSG00000171872_KLF17 
## ENSG00000204531_POU5F1 
## ENSG00000111704_NANOG 
## ENSG00000186103_ARGFX 
## ENSG00000164736_SOX17 
## ENSG00000125798_FOXA2 
## ENSG00000136574_GATA4 
## ENSG00000134853_PDGFRA 
## ENSG00000179348_GATA2 
## ENSG00000070915_SLC12A3 
## ENSG00000165556_CDX2 
## ENSG00000007866_TEAD3

Author-defined clusters

Visualize author-defined clusters on this embedding (Low resolution; Fig. 2b). Note: 1672 out of total 2311 GEO-deposited blastoid cells (72.3%) are used in Fig. 2a-d.

GEO-deposited cell metadata.

cell_metadata_PRJNA737139_author |>
    dplyr::count(treatment, name = "num_cells") |>
    gt::gt() |>
    gt::tab_options(table.font.size = "median") |>
    # gt::grand_summary_rows(
    gt::summary_rows(
        columns = c(num_cells),
        fns = list(
            Sum = ~ sum(.)
        ),
        decimals = 0
    )

	treatment	num_cells
	2D_TSCM	128
	E8	125
	PALLY	248
	PALLY_48_hours_LY_12_hours	744
	PALLY_48_hours_LY_48_hours	1319
	PXGL	151
Sum	—	2,715

Cells included in Fig. 2a.

cell_metadata_PRJNA737139_author |>
    dplyr::filter(
        !is.na(blastoid.Fig2b.lowres)
    ) |>
    dplyr::count(treatment, name = "num_cells") |>
    gt::gt() |>
    gt::tab_options(table.font.size = "median") |>
    gt::summary_rows(
        columns = c(num_cells),
        fns = list(
            Sum = ~ sum(.)
        ),
        decimals = 0
    )

	treatment	num_cells
	2D_TSCM	128
	E8	124
	PALLY	223
	PALLY_48_hours_LY_12_hours	460
	PALLY_48_hours_LY_48_hours	989
	PXGL	143
Sum	—	2,067

Match the names used in Fig. 2a.

cell_metadata_PRJNA737139 |>
    dplyr::filter(
        !is.na(blastoid.Fig2b.lowres)
    ) |>
    dplyr::count(lineage, name = "num_cells") |>
    dplyr::rename(group = lineage) |>
    gt::gt() |>
    gt::tab_options(table.font.size = "median") |>
    gt::summary_rows(
        columns = c(num_cells),
        fns = list(
            Sum = ~ sum(.)
        ),
        decimals = 0
    )

	group	num_cells
	Naive H9	143
	Primed H9	124
	TSCs	128
	Blastoid_24h	223
	Blastoid_60h	460
	Blastoid_96h	989
Sum	—	2,067

Low resolution

Visualize author-defined clusters on this embedding (Low resolution; Fig. 2b).

plot_embedding(
    embedding = embedding_2067[, c(x_column, y_column)],
    color_values = embedding_2067 |>
        dplyr::left_join(
            cell_metadata_PRJNA737139
        ) |>
        dplyr::pull(blastoid.Fig2b.lowres)
        |> as.factor(),
    label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Fig. 2b clusters"),
    label_position = NULL,
    show_color_value_labels = TRUE,
    show_color_legend = FALSE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = FALSE,
    shuffle_values = FALSE,
    rasterise = RASTERISED
) +
    theme_customized()

calc_group_composition(
    data = embedding_2067 |>
        dplyr::left_join(
            cell_metadata_PRJNA737139
        ),
    x = "blastoid.Fig2b.lowres",
    group = "lineage"
) |>
    dplyr::mutate(
        blastoid.Fig2b.lowres = as.factor(blastoid.Fig2b.lowres)
    ) |>
    plot_barplot(
        x = "blastoid.Fig2b.lowres",
        y = "percentage",
        z = "lineage",
        legend_ncol = 1
    )

Visualize selected four non-blastocyst genes on this embedding (Extended Data Fig. 3b).

FEATURES_SELECTED <- c(
    "ENSG00000094755_GABRP",
    "ENSG00000134817_APLNR",
    "ENSG00000016082_ISL1",
    "ENSG00000143320_CRABP2"
)

purrr::map(FEATURES_SELECTED, \(x) {
    selected_feature <- x

    cat(selected_feature, "\n")
    values <- log10(calc_cpm(matrix_readcount_use[, embedding_2067$cell])[selected_feature, ] + 1)

    plot_embedding(
        embedding = embedding_2067[, c(x_column, y_column)],
        color_values = values,
        label = paste(
            EMBEDDING_TITLE_PREFIX,
            selected_feature |> stringr::str_remove(pattern = "^E.+_"),
            sep = "; "
        ),
        label_position = NULL,
        show_color_value_labels = FALSE,
        show_color_legend = TRUE,
        geom_point_size = GEOM_POINT_SIZE * 1.5,
        sort_values = TRUE,
        shuffle_values = FALSE,
        rasterise = RASTERISED,
        legend_size = 2
    ) +
        scale_color_viridis_c(
            na.value = "grey80"
        ) +
        theme_customized(
            legend_key_size = 2,
            legend_text_size = 5
        )
}) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2, byrow = FALSE) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

## ENSG00000094755_GABRP 
## ENSG00000134817_APLNR 
## ENSG00000016082_ISL1 
## ENSG00000143320_CRABP2

High resolution

Visualize author-defined clusters on this embedding (High resolution; Extended Data Fig. 3a).

plot_embedding(
    embedding = embedding_2067[, c(x_column, y_column)],
    color_values = embedding_2067 |>
        dplyr::left_join(
            cell_metadata_PRJNA737139
        ) |>
        dplyr::pull(blastoid.FigS3a.highres)
        |> as.factor(),
    label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Fig. S3a clusters"),
    label_position = NULL,
    show_color_value_labels = TRUE,
    show_color_legend = FALSE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = FALSE,
    shuffle_values = FALSE,
    rasterise = RASTERISED
) +
    theme_customized()

p_barplot_highres_1 <- calc_group_composition(
    data = embedding_2067 |>
        dplyr::left_join(
            cell_metadata_PRJNA737139
        ),
    x = "blastoid.FigS3a.highres",
    group = "lineage"
) |>
    dplyr::mutate(
        lineage = stringr::str_remove(string = lineage, pattern = "_.+$"),
        lineage = as.factor(
            lineage
        ),
        blastoid.FigS3a.highres = as.factor(blastoid.FigS3a.highres)
    ) |>
    plot_barplot(
        x = "blastoid.FigS3a.highres",
        y = "percentage",
        z = "lineage",
        legend_ncol = 1
    )

p_barplot_highres_2 <- calc_group_composition(
    data = embedding_2067 |>
        dplyr::left_join(
            cell_metadata_PRJNA737139
        ),
    x = "blastoid.FigS3a.highres",
    group = "lineage"
) |>
    dplyr::mutate(
        blastoid.FigS3a.highres = as.factor(blastoid.FigS3a.highres)
    ) |>
    plot_barplot(
        x = "blastoid.FigS3a.highres",
        y = "percentage",
        z = "lineage",
        legend_ncol = 1
    )

list(p_barplot_highres_1, p_barplot_highres_2) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 1, byrow = FALSE, guides = "collect") +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

Subset of blastoid cells

According to the cell metadata deposited at GEO, only 96h-blastoid cells were included for comparison with other datasets.

cell_metadata_PRJNA737139_author |>
    dplyr::filter(
        !is.na(blastoid.FigS2g.PZT.integration)
    ) |>
    dplyr::count(treatment, name = "num_cells") |>
    gt::gt() |>
    gt::tab_options(table.font.size = "median") |>
    gt::summary_rows(
        columns = c(num_cells),
        fns = list(
            Sum = ~ sum(.)
        ),
        decimals = 0
    )

	treatment	num_cells
	E8	124
	PALLY_48_hours_LY_48_hours	989
	PXGL	143
Sum	—	1,256

Match the names used in Fig. 2a.

cell_metadata_PRJNA737139 |>
    dplyr::filter(
        !is.na(blastoid.FigS2g.PZT.integration)
    ) |>
    dplyr::count(lineage, name = "num_cells") |>
    dplyr::rename(group = lineage) |>
    gt::gt() |>
    gt::tab_options(table.font.size = "median") |>
    gt::summary_rows(
        columns = c(num_cells),
        fns = list(
            Sum = ~ sum(.)
        ),
        decimals = 0
    )

	group	num_cells
	Naive H9	143
	Primed H9	124
	Blastoid_96h	989
Sum	—	1,256

Highlight blastoid cells used in Fig. 2e-f.

plot_embedding(
    embedding = embedding_2067[, c(x_column, y_column, "cell")] |>
        dplyr::left_join(cell_metadata_PRJNA737139),
    color_values = embedding_2067[, c(x_column, y_column, "cell")] |>
        dplyr::left_join(cell_metadata_PRJNA737139) |>
        dplyr::mutate(
            value = case_when(
                (
                    stringr::str_detect(string = lineage, pattern = "Blast") & is.na(blastoid.FigS2g.PZT.integration)
                ) ~ "1",
                TRUE ~ "0"
            )
        ) |>
        dplyr::pull(value) |>
        as.factor(),
    label = glue::glue("{EMBEDDING_TITLE_PREFIX}; 96h blastoid cells"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = FALSE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = FALSE,
    shuffle_values = FALSE,
    rasterise = RASTERISED
) +
    theme_customized() +
    ggplot2::scale_color_manual(
        values = c("grey70", "salmon")
    )

Integration with other datasets

Instead of only including a subset of each dataset, this embedding includes all the cells mentioned in their respective publications to cover the full spectrum of cellular diversity.

Included cells and their lineage annotations are the same as in their respective publications. See here for details.

EMBEDDING_FILE <- glue::glue(
    "embedding_ncomponents18_ccc1_seed20210719.csv.gz"
)

embedding <- vroom::vroom(
    file = file.path(
        PROJECT_DIR,
        "raw",
        "public",
        "PRJNA737139",
        "clustering",
        "PRJNA737139_PRJEB11202_PRJNA431392_PRJEB40781",
        "exploring",
        "Scanpy_Harmony_variable",
        EMBEDDING_FILE
    )
)

embedding |> head()

studies <- tibble::tribble(
    ~bioproject, ~citation,
    "PRJEB11202", "Petropoulos et al. 2016",
    "PRJEB40781", "Tyser et al. 2021",
    "PRJNA431392", "Zhou et al. 2019",
    "PRJNA737139", "Kagawa et al. 2021",
    "PRJNA632839", "Yu et al. 2021",
    "PRJNA658478", "Liu et al. 2021",
    "PRJNA720968", "Yanagida et al. 2021",
    "PRJNA667174", "Fan et al. 2021",
    "PRJNA738498", "Sozen et al. 2021",
    "PRJNA737139", "Kagawa et al. 2021"
)

studies <- setNames(
    object = studies$citation,
    nm = studies$bioproject
)

embedding |>
    dplyr::count(batch, name = "num_cells") |>
    dplyr::mutate(
        study = studies[batch]
    ) |>
    gt::gt() |>
    gt::tab_options(table.font.size = "median") |>
    gt::summary_rows(
        columns = c(num_cells),
        fns = list(
            Sum = ~ sum(.)
        ),
        decimals = 0
    )

	batch	num_cells	study
	PRJEB11202	1529	Petropoulos et al. 2016
	PRJEB40781	1195	Tyser et al. 2021
	PRJNA431392	5911	Zhou et al. 2019
	PRJNA737139	2067	Kagawa et al. 2021
Sum	—	10,702	—

Metadata

# PRJNA431392; Zhou et al. 2019
embryos_selected_PRJNA431392 <- c(
    "ha_D6_E2",
    "hm_D6_E1",
    "hm_D6_E2",
    "hm_D8_E2",
    "hm_D8_E3",
    "hm_D8_E5",
    "ha_D8_E1",
    "hm_D8_E1",
    "hv_D8_E1",
    "hv_D8_E2",
    "hv_D8_E3",
    "hv_D10_E6",
    "ha_D10_E1",
    "ha_D10_E2",
    "hm_D10_E4",
    "hm_D10_E9",
    "hv_D10_E7",
    "hv_D10_E8",
    "ha_D12_E1",
    "hv_D12_E1",
    "hv_D12_E2"
)

cell_metadata_PRJNA431392 <- vroom::vroom(
    file = file.path(
        PROJECT_DIR,
        "raw",
        "public",
        "PRJNA431392",
        "matrix/cell_metadata.csv"
    )
) |>
    dplyr::mutate(
        developmental_stage = factor(
            Day,
            levels = stringr::str_sort(
                unique(Day),
                numeric = TRUE
            )
        ),
        lineage = factor(
            Lineage,
            levels = c(
                "EPI",
                "PE",
                "TE",
                "MIX"
            )
        ),
        `3184` = case_when(
            Ori_Day_Emb %in% embryos_selected_PRJNA431392 ~ "1",
            TRUE ~ "0"
        )
    ) |>
    dplyr::rename(cell = Sample)


# PRJEB40781; Tyser et al. 2021
cell_metadata_PRJEB40781 <- vroom::vroom(
    file = file.path(
        PROJECT_DIR,
        "raw",
        "public",
        "PRJEB40781",
        "matrix/cell_metadata.csv"
    )
) |>
    dplyr::mutate(
        lineage = factor(lineage),
        lineage_ontology = factor(lineage_ontology),
        sampling_site = factor(sampling_site)
    )


# PRJEB11202; Petropoulos et al. 2016
cell_metadata_PRJEB11202 <- vroom::vroom(
    file = file.path(
        PROJECT_DIR,
        "raw",
        "public",
        "PRJEB11202",
        "matrix/cell_metadata.csv"
    )
) |>
    dplyr::mutate(
        developmental_stage = stringr::str_remove(
            string = individual,
            pattern = "\\..+$"
        ),
        developmental_stage = factor(
            developmental_stage,
            levels = stringr::str_sort(
                unique(developmental_stage),
                numeric = TRUE
            )
        ),
        #
        lineage = factor(
            inferred_lineage,
            levels = c(
                "epiblast",
                "primitive_endoderm",
                "trophectoderm",
                "not_applicable"
            )
        )
    ) |>
    dplyr::select(
        cell, lineage, developmental_stage
    )

Visualization

Clustering

p_embedding_leiden <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding$leiden |> as.factor(),
    label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Leiden"),
    label_position = NULL,
    show_color_value_labels = TRUE,
    show_color_legend = FALSE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = FALSE,
    shuffle_values = FALSE,
    rasterise = RASTERISED
) +
    theme_customized()

p_embedding_UMI <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = log10(Matrix::colSums(matrix_readcount_use[, embedding$cell])),
    label = glue::glue("{EMBEDDING_TITLE_PREFIX}; UMI"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE * 1,
    geom_point_alpha = 1,
    sort_values = TRUE,
    shuffle_values = FALSE,
    label_size = 2.5,
    label_hjust = 0,
    label_vjust = 0,
    rasterise = RASTERISED,
    legend_size = 2,
    legend_ncol = 1
) +
    theme_customized(
        legend_key_size = 2,
        legend_text_size = 5
    )

p_embedding_MT <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding |>
        dplyr::left_join(
            cell_metadata # |> dplyr::select(-batch)
        ) |>
        dplyr::pull(mt_percentage),
    label = glue::glue("{EMBEDDING_TITLE_PREFIX}; MT %"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE * 1,
    geom_point_alpha = 1,
    sort_values = TRUE,
    shuffle_values = FALSE,
    label_size = 2.5,
    label_hjust = 0,
    label_vjust = 0,
    rasterise = RASTERISED,
    legend_size = 2,
    legend_ncol = 1
) +
    theme_customized(
        legend_key_size = 2,
        legend_text_size = 5
    )

p_embedding_batch <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding$batch |> as.factor(),
    label = glue::glue("{EMBEDDING_TITLE_PREFIX}; Batch"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE / 2,
    sort_values = FALSE,
    shuffle_values = TRUE,
    rasterise = RASTERISED,
    legend_size = 2
) +
    theme_customized(
        legend_key_size = 2,
        legend_text_size = 5
    ) +
    scale_color_manual(
        values = scales::hue_pal()(n = length(unique(embedding$batch))),
        labels = studies
    )

purrr::reduce(
    list(
        p_embedding_leiden,
        p_embedding_batch,
        p_embedding_UMI,
        p_embedding_MT
    ), `+`
) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = ggplot2::theme(plot.margin = ggplot2::margin())
    )

purrr::map(unique(embedding$batch), \(x) {
    plot_embedding(
        embedding = embedding[, c(x_column, y_column)],
        color_values = as.integer(embedding$batch == x) |> as.factor(),
        label = glue::glue("{EMBEDDING_TITLE_PREFIX}; {studies[x]}, {sum(as.integer(embedding$batch == x), na.rm = TRUE)}"),
        label_position = NULL,
        show_color_value_labels = FALSE,
        show_color_legend = FALSE,
        geom_point_size = GEOM_POINT_SIZE,
        sort_values = TRUE,
        shuffle_values = FALSE,
        rasterise = RASTERISED,
        legend_size = 2
    ) +
        theme_customized(
            legend_key_size = 2,
            legend_text_size = 5
        ) +
        scale_color_manual(
            values = c("grey70", "salmon")
        )
}) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = ggplot2::theme(plot.margin = ggplot2::margin())
    )

Cell annotation

Petropoulos et al. 2016

bioproject <- "PRJEB11202"
cell_metadata_selected <- cell_metadata_PRJEB11202

selected_column <- "developmental_stage"
p_embedding_developmental_stage <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding |>
        dplyr::left_join(cell_metadata_selected) |>
        dplyr::pull(.data[[selected_column]]),
    label = glue::glue(
        "{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
        "{selected_column |>
        stringr::str_to_title() |>
        stringr::str_replace(pattern = \"_\", replacement = \" \")}"
    ),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = TRUE,
    shuffle_values = FALSE,
    rasterise = RASTERISED,
    legend_size = 2
) +
    theme_customized(
        legend_key_size = 2,
        legend_text_size = 5
    ) +
    scale_color_manual(
        na.translate = TRUE,
        values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
        na.value = "#7F7F7F"
    )

selected_column <- "lineage"
p_embedding_lineage <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding |>
        dplyr::left_join(cell_metadata_selected) |>
        dplyr::pull(.data[[selected_column]]),
    label = glue::glue(
        "{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
        "{selected_column |>
        stringr::str_to_title() |>
        stringr::str_replace(pattern = \"_\", replacement = \" \")}"
    ),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = TRUE,
    shuffle_values = FALSE,
    rasterise = RASTERISED,
    legend_size = 2
) +
    theme_customized(
        legend_key_size = 2,
        legend_text_size = 5
    ) +
    scale_color_manual(
        na.translate = TRUE,
        values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
        na.value = "#7F7F7F"
    )

list(
    p_embedding_lineage,
    p_embedding_developmental_stage
) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = ggplot2::theme(plot.margin = ggplot2::margin())
    )

Salmon: highlighted group of cells; Light grey: cells belonging to this dataset but not the highlighted group; Dark grey: cells belonging to other datasets.

plot_embedding_highlight(
    embedding = embedding,
    x = cell_metadata_selected,
    y = "lineage",
    label = studies[bioproject],
    n_cols = 2
)

plot_embedding_highlight(
    embedding = embedding,
    x = cell_metadata_selected,
    y = "developmental_stage",
    label = studies[bioproject],
    n_cols = 2
)

Zhou et al. 2019

bioproject <- "PRJNA431392"
cell_metadata_selected <- cell_metadata_PRJNA431392
selected_column <- "developmental_stage"

p_embedding_developmental_stage <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding |>
        dplyr::left_join(cell_metadata_selected) |>
        dplyr::pull(.data[[selected_column]]),
    label = glue::glue(
        "{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
        "{selected_column |>
        stringr::str_to_title() |>
        stringr::str_replace(pattern = \"_\", replacement = \" \")}"
    ),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = TRUE,
    shuffle_values = FALSE,
    rasterise = RASTERISED,
    legend_size = 2
) +
    theme_customized(
        legend_key_size = 2,
        legend_text_size = 5
    ) +
    scale_color_manual(
        na.translate = TRUE,
        values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
        na.value = "#7F7F7F"
    )

selected_column <- "lineage"
p_embedding_lineage <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding |>
        dplyr::left_join(cell_metadata_selected) |>
        dplyr::pull(.data[[selected_column]]),
    label = glue::glue(
        "{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
        "{selected_column |>
        stringr::str_to_title() |>
        stringr::str_replace(pattern = \"_\", replacement = \" \")}"
    ),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = TRUE,
    shuffle_values = FALSE,
    rasterise = RASTERISED,
    legend_size = 2
) +
    theme_customized(
        legend_key_size = 2,
        legend_text_size = 5
    ) +
    scale_color_manual(
        na.translate = TRUE,
        values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
        na.value = "#7F7F7F"
    )

selected_column <- "3184"
p_embedding_chosen <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding |>
        dplyr::left_join(cell_metadata_selected) |>
        dplyr::pull(.data[[selected_column]]),
    label = glue::glue(
        "{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
        "{selected_column |>
        stringr::str_to_title() |>
        stringr::str_replace(pattern = \"_\", replacement = \" \")}"
    ),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = TRUE,
    shuffle_values = FALSE,
    rasterise = RASTERISED,
    legend_size = 2
) +
    theme_customized(
        legend_key_size = 2,
        legend_text_size = 5
    ) +
    scale_color_manual(
        na.translate = TRUE,
        values = c("grey70", "salmon"),
        na.value = "#7F7F7F"
    )

list(
    p_embedding_lineage,
    p_embedding_developmental_stage,
    p_embedding_chosen
) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = ggplot2::theme(plot.margin = ggplot2::margin())
    )

plot_embedding_highlight(
    embedding = embedding,
    x = cell_metadata_selected,
    y = "developmental_stage",
    label = studies[bioproject],
    n_cols = 2
)

Tyser et al. 2021

bioproject <- "PRJEB40781"
cell_metadata_selected <- cell_metadata_PRJEB40781

selected_column <- "sampling_site"
p_embedding_sampling_site <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding |>
        dplyr::left_join(cell_metadata_selected) |>
        dplyr::pull(.data[[selected_column]]),
    label = glue::glue(
        "{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
        "{selected_column |>
        stringr::str_to_title() |>
        stringr::str_replace(pattern = \"_\", replacement = \" \")}"
    ),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = TRUE,
    shuffle_values = FALSE,
    rasterise = RASTERISED,
    legend_size = 2
) +
    theme_customized(
        legend_key_size = 2,
        legend_text_size = 5
    ) +
    scale_color_manual(
        na.translate = TRUE,
        values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
        na.value = "grey70"
    )

selected_column <- "lineage"
p_embedding_lineage <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding |>
        dplyr::left_join(cell_metadata_selected) |>
        dplyr::pull(.data[[selected_column]]),
    label = glue::glue(
        "{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
        "{selected_column |>
        stringr::str_to_title() |>
        stringr::str_replace(pattern = \"_\", replacement = \" \")}"
    ),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = TRUE,
    shuffle_values = FALSE,
    rasterise = RASTERISED,
    legend_size = 2
) +
    theme_customized(
        legend_key_size = 2,
        legend_text_size = 5
    ) +
    scale_color_manual(
        na.translate = TRUE,
        values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
        na.value = "#7F7F7F"
    )

list(
    p_embedding_lineage,
    p_embedding_sampling_site
) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = ggplot2::theme(plot.margin = ggplot2::margin())
    )

plot_embedding_highlight(
    embedding = embedding,
    x = cell_metadata_selected,
    y = "lineage",
    label = studies[bioproject],
    n_cols = 2
)

Kagawa et al. 2021

bioproject <- "PRJNA737139"
cell_metadata_selected <- cell_metadata_PRJNA737139

selected_column <- "source_name"
p_embedding_source <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding |>
        dplyr::left_join(cell_metadata_selected) |>
        dplyr::pull(.data[[selected_column]]),
    label = glue::glue(
        "{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
        "{selected_column |>
        stringr::str_to_title() |>
        stringr::str_replace(pattern = \"_\", replacement = \" \")}"
    ),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = TRUE,
    shuffle_values = FALSE,
    rasterise = RASTERISED,
    legend_size = 2
) +
    theme_customized(
        legend_key_size = 2,
        legend_text_size = 5
    ) +
    scale_color_manual(
        na.translate = TRUE,
        values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
        na.value = "#7F7F7F"
    )

selected_column <- "lineage"
p_embedding_lineage <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding |>
        dplyr::left_join(cell_metadata_selected) |>
        dplyr::pull(.data[[selected_column]]),
    label = glue::glue(
        "{EMBEDDING_TITLE_PREFIX}; {studies[bioproject]}; ",
        "{selected_column |>
        stringr::str_to_title() |>
        stringr::str_replace(pattern = \"_\", replacement = \" \")}"
    ),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = GEOM_POINT_SIZE,
    sort_values = TRUE,
    shuffle_values = FALSE,
    rasterise = RASTERISED,
    legend_size = 2
) +
    theme_customized(
        legend_key_size = 2,
        legend_text_size = 5
    ) +
    scale_color_manual(
        na.translate = TRUE,
        values = scales::hue_pal()(n = length(unique(cell_metadata_selected[[selected_column]]))),
        na.value = "#7F7F7F"
    )

list(
    p_embedding_source,
    p_embedding_lineage
) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = ggplot2::theme(plot.margin = ggplot2::margin())
    )

plot_embedding_highlight(
    embedding = embedding,
    x = cell_metadata_selected,
    y = "lineage",
    label = studies[bioproject],
    n_cols = 2
)

R session info

devtools::session_info()$platform

##  setting  value
##  version  R version 4.1.2 (2021-11-01)
##  os       macOS Monterey 12.1
##  system   aarch64, darwin20.6.0
##  ui       unknown
##  language (EN)
##  collate  en_US.UTF-8
##  ctype    en_US.UTF-8
##  tz       America/Chicago
##  date     2022-01-18
##  pandoc   2.14.0.3 @ /Applications/RStudio.app/Contents/MacOS/pandoc/ (via rmarkdown)

devtools::session_info()$pack |>
    as_tibble() |>
    dplyr::select(
        package,
        loadedversion,
        date,
        `source`
    ) |>
    # print(n = nrow(.))
    gt::gt() |>
    gt::tab_options(table.font.size = "median")

package	loadedversion	date	source
assertthat	0.2.1	2019-03-21	CRAN (R 4.1.1)
backports	1.4.1	2021-12-13	CRAN (R 4.1.2)
beeswarm	0.4.0	2021-06-01	CRAN (R 4.1.2)
bit	4.0.4	2020-08-04	CRAN (R 4.1.1)
bit64	4.0.5	2020-08-30	CRAN (R 4.1.1)
brio	1.1.3	2021-11-30	CRAN (R 4.1.2)
broom	0.7.11	2022-01-03	CRAN (R 4.1.2)
bslib	0.3.1	2021-10-06	CRAN (R 4.1.1)
cachem	1.0.6	2021-08-19	CRAN (R 4.1.1)
callr	3.7.0	2021-04-20	CRAN (R 4.1.1)
cellranger	1.1.0	2016-07-27	CRAN (R 4.1.1)
checkmate	2.0.0	2020-02-06	CRAN (R 4.1.1)
cli	3.1.0	2021-10-27	CRAN (R 4.1.1)
colorspace	2.0-2	2021-06-24	CRAN (R 4.1.1)
crayon	1.4.2	2021-10-29	CRAN (R 4.1.1)
curl	4.3.2	2021-06-23	CRAN (R 4.1.1)
data.table	1.14.2	2021-09-27	CRAN (R 4.1.1)
DBI	1.1.2	2021-12-20	CRAN (R 4.1.2)
dbplyr	2.1.1	2021-04-06	CRAN (R 4.1.1)
desc	1.4.0	2021-09-28	CRAN (R 4.1.1)
devtools	2.4.3.9000	2022-01-15	Github (r-lib/devtools@e2f25cd69031c8d2099106baed894df4109cb7a4)
digest	0.6.29	2021-12-01	CRAN (R 4.1.2)
dplyr	1.0.7.9000	2022-01-12	Github (tidyverse/dplyr@05013358ace44fe17a51395d49d384232d18d6c1)
dtplyr	1.2.0	2021-12-05	CRAN (R 4.1.2)
ellipsis	0.3.2	2021-04-29	CRAN (R 4.1.1)
evaluate	0.14	2019-05-28	CRAN (R 4.1.1)
extrafont	0.17	2014-12-08	CRAN (R 4.1.1)
extrafontdb	1.0	2012-06-11	CRAN (R 4.1.1)
fansi	1.0.2	2022-01-14	CRAN (R 4.1.2)
farver	2.1.0	2021-02-28	CRAN (R 4.1.1)
fastmap	1.1.0	2021-01-25	CRAN (R 4.1.1)
forcats	0.5.1.9000	2021-11-29	Github (tidyverse/forcats@b4dade0636a46543c30b0b647d97c3ce697c0ada)
fs	1.5.2.9000	2021-12-09	Github (r-lib/fs@6d1182fea7e1c1ddbef3b0bba37c0b0a2e09749c)
gargle	1.2.0	2021-07-02	CRAN (R 4.1.1)
generics	0.1.1	2021-10-25	CRAN (R 4.1.1)
ggbeeswarm	0.6.0	2017-08-07	CRAN (R 4.1.2)
ggplot2	3.3.5	2021-06-25	CRAN (R 4.1.1)
ggrastr	1.0.1	2021-12-08	Github (VPetukhov/ggrastr@7aed9af2b9cffabda86e6d2af2fa10d4e60cc63d)
glue	1.6.0.9000	2021-12-21	Github (tidyverse/glue@76793ef2c376140350c0e1909e66fd404a52b1ef)
googledrive	2.0.0	2021-07-08	CRAN (R 4.1.1)
googlesheets4	1.0.0	2021-07-21	CRAN (R 4.1.1)
gt	0.3.1.9000	2022-01-17	Github (rstudio/gt@fcabb414c55b70c9e445fbedfb24d52fe394ba61)
gtable	0.3.0.9000	2021-10-28	Github (r-lib/gtable@a0bd2721a0a31c8b4391b84aabe98f8c85881140)
haven	2.4.3	2021-08-04	CRAN (R 4.1.1)
highr	0.9	2021-04-16	CRAN (R 4.1.1)
hms	1.1.1	2021-09-26	CRAN (R 4.1.1)
htmltools	0.5.2	2021-08-25	CRAN (R 4.1.1)
httr	1.4.2	2020-07-20	CRAN (R 4.1.1)
jquerylib	0.1.4	2021-04-26	CRAN (R 4.1.1)
jsonlite	1.7.3	2022-01-17	CRAN (R 4.1.2)
knitr	1.37.1	2021-12-21	https://yihui.r-universe.dev (R 4.1.2)
labeling	0.4.2	2020-10-20	CRAN (R 4.1.1)
lattice	0.20-45	2021-09-22	CRAN (R 4.1.2)
lifecycle	1.0.1	2021-09-24	CRAN (R 4.1.1)
lubridate	1.8.0	2022-01-15	Github (tidyverse/lubridate@53e5892a548b3425d6c3bf887542aa105341ab73)
magrittr	2.0.1	2020-11-17	CRAN (R 4.1.1)
Matrix	1.4-0	2021-12-08	CRAN (R 4.1.2)
memoise	2.0.1	2021-11-26	CRAN (R 4.1.2)
modelr	0.1.8.9000	2021-10-27	Github (tidyverse/modelr@16168e0624215d9d1a008f3a85de30aeb75302f6)
munsell	0.5.0	2018-06-12	CRAN (R 4.1.1)
patchwork	1.1.0.9000	2021-10-27	Github (thomasp85/patchwork@79223d3002e7bd7e715a270685c6507d684b2622)
pillar	1.6.4	2021-10-18	CRAN (R 4.1.1)
pkgbuild	1.3.1	2021-12-20	CRAN (R 4.1.2)
pkgconfig	2.0.3	2019-09-22	CRAN (R 4.1.1)
pkgload	1.2.4	2021-11-30	CRAN (R 4.1.2)
png	0.1-7	2013-12-03	CRAN (R 4.1.1)
prettyunits	1.1.1	2020-01-24	CRAN (R 4.1.1)
processx	3.5.2	2021-04-30	CRAN (R 4.1.1)
ps	1.6.0	2021-02-28	CRAN (R 4.1.1)
purrr	0.3.4	2020-04-17	CRAN (R 4.1.1)
R.cache	0.15.0	2021-04-30	CRAN (R 4.1.1)
R.methodsS3	1.8.1	2020-08-26	CRAN (R 4.1.1)
R.oo	1.24.0	2020-08-26	CRAN (R 4.1.1)
R.utils	2.11.0	2021-09-26	CRAN (R 4.1.1)
R6	2.5.1.9000	2021-12-09	Github (r-lib/R6@1b05b89f30fe6713cb9ff51d91fc56bd3016e4b2)
ragg	1.2.1.9000	2021-12-08	Github (r-lib/ragg@c68c6665ef894f16c006333658b32bf25d2e9d19)
Rcpp	1.0.8	2022-01-13	CRAN (R 4.1.2)
readr	2.1.1	2021-11-30	CRAN (R 4.1.2)
readxl	1.3.1.9000	2022-01-06	Github (tidyverse/readxl@1059a768436df38fb7b33f8e0415c18585c11ac5)
remotes	2.4.2	2021-12-02	Github (r-lib/remotes@fcad17b68b7a19d5363d64adfb0a0426a3a5b3db)
reprex	2.0.1	2021-08-05	CRAN (R 4.1.1)
reticulate	1.23	2022-01-14	CRAN (R 4.1.2)
rlang	0.99.0.9003	2022-01-17	Github (r-lib/rlang@a165f49e841dc848fb01133dd265a83c5a671013)
rmarkdown	2.11.8	2022-01-15	Github (rstudio/rmarkdown@52a65394a6e1ab84b10feccf893e2a16ee25aeb3)
rprojroot	2.0.2	2020-11-15	CRAN (R 4.1.1)
rstudioapi	0.13.0-9000	2022-01-15	Github (rstudio/rstudioapi@5d0f0873dc160779c71bf4b00d8b016b898f6fb5)
Rttf2pt1	1.3.9	2021-07-22	CRAN (R 4.1.1)
rvest	1.0.2	2021-10-16	CRAN (R 4.1.1)
sass	0.4.0	2021-05-12	CRAN (R 4.1.1)
scales	1.1.1	2020-05-11	CRAN (R 4.1.1)
sessioninfo	1.2.2	2021-12-06	CRAN (R 4.1.2)
stringi	1.7.6	2021-11-29	CRAN (R 4.1.2)
stringr	1.4.0.9000	2022-01-17	Github (tidyverse/stringr@3848cd70b1e331e6c20401e4da518ff4c3725324)
styler	1.6.2.9000	2022-01-17	Github (r-lib/styler@9274aed613282eca01909ae8c341224055d9c928)
systemfonts	1.0.3.9000	2021-12-07	Github (r-lib/systemfonts@414114e645efb316def3d8de1056d855f92d588e)
testthat	3.1.1.9000	2022-01-13	Github (r-lib/testthat@f09df60dd881530332b252474e9f35c97f8640be)
textshaping	0.3.6	2021-10-13	CRAN (R 4.1.1)
tibble	3.1.6.9000	2021-12-30	Github (tidyverse/tibble@2d8d3fddda2b879a82cb58086f2107da4f04befa)
tidyr	1.1.4	2021-09-27	CRAN (R 4.1.1)
tidyselect	1.1.1	2021-04-30	CRAN (R 4.1.1)
tidyverse	1.3.1.9000	2021-12-08	Github (tidyverse/tidyverse@6186fbf09bf359110f8800ff989cbbdd40485eb0)
tzdb	0.2.0	2021-10-27	CRAN (R 4.1.1)
usethis	2.1.5.9000	2022-01-17	Github (r-lib/usethis@16d14b148cca803e15be4b33a7453a69e8d6fc29)
utf8	1.2.2	2021-07-24	CRAN (R 4.1.1)
vctrs	0.3.8	2021-04-29	CRAN (R 4.1.1)
vipor	0.4.5	2017-03-22	CRAN (R 4.1.2)
viridisLite	0.4.0	2021-04-13	CRAN (R 4.1.1)
vroom	1.5.7	2021-11-30	CRAN (R 4.1.2)
withr	2.4.3	2021-11-30	CRAN (R 4.1.2)
xfun	0.29	2021-12-14	CRAN (R 4.1.2)
xml2	1.3.3	2021-11-30	CRAN (R 4.1.2)
yaml	2.2.1	2020-02-01	CRAN (R 4.1.1)

Single-cell transcriptomes of human blastoids generated by Kagawa et al., 2021

Jialei Duan

2022-01-18

Data preparation

Functions loading

Data loading

Matrix

Metadata

Clustering with PSCs and TSCs

Cell metadata of Kagawa et al. 2021

Visualization

Embedding

Clustering

Cell type

Expression

Author-defined clusters

Low resolution

High resolution

Subset of blastoid cells

Integration with other datasets

Metadata

Visualization

Clustering

Cell annotation

Petropoulos et al. 2016

Zhou et al. 2019

Tyser et al. 2021

Kagawa et al. 2021

R session info