Lv, B., An, Q., Zeng, Q., Zhang, X., Lu, P., Wang, Y., Zhu, X., Ji, Y., Fan, G., and Xue, Z. (2019). Single-cell RNA sequencing reveals regulatory mechanism for trophoblast cell-fate divergence in human peri-implantation conceptuses. PLoS Biol. 17, e3000187.

  • BioProject Accession: PRJNA516921
  • GEO Accession: GSE125616



Load required packages.

library(tidyverse)
library(magrittr)
library(Matrix)
library(extrafont)
library(ggrepel)
library(patchwork)
# library(tidylog)
Sys.time()
## [1] "2020-08-12 18:38:11 CDT"

Data preparation

SEED_USE <- 20200616
MINIMAL_NUM_COUNTS_REQUIRED_FOR_GENE <- 1

Functions loading

source(
    file = file.path(
        SCRIPT_DIR,
        "utilities.R"
    )
)

Data loading

Prepare metadata for single cells.

# https://www.ncbi.nlm.nih.gov/geo/browse/?view=samples&series=125616
geo_info <- read_csv(
    file = "sample.csv"
)
## Parsed with column specification:
## cols(
##   Accession = col_character(),
##   Title = col_character(),
##   `Sample Type` = col_character(),
##   Taxonomy = col_character(),
##   Channels = col_double(),
##   Platform = col_character(),
##   Series = col_character(),
##   `Supplementary Types` = col_character(),
##   `Supplementary Links` = col_character(),
##   `SRA Accession` = col_character(),
##   Contact = col_character(),
##   `Release Date` = col_character()
## )
cell_metadata <- read_delim(
    file = "../SraRunTable.txt",
    delim = ","
) %>%
    rename_all(. %>% tolower()) %>%
    `colnames<-`(str_replace(
        string = colnames(.),
        pattern = " ", replacement = "_"
    )) %>%
    select(
        sample_name,
        run,
        biosample,
        development_day,
        source_name,
        stage
    ) %>%
    left_join(
        geo_info %>%
            select(Accession, Title),
        by = c("sample_name" = "Accession")
    ) %>%
    rename_all(. %>% tolower()) %>%
    dplyr::rename(
        developmental_stage = "development_day",
    ) %>%
    mutate(
        developmental_stage = str_replace(
            string = developmental_stage,
            pattern = "day",
            replacement = "Day "
        ),
        developmental_stage = factor(
            developmental_stage,
            levels = developmental_stage %>%
                unique() %>%
                str_sort(numeric = TRUE)
        ),
        stage = str_to_title(string = stage),
        stage = factor(
            stage,
            levels = c(
                "Blastocyst", "Implantation", "Post-Implantation", "Endometrial"
            )
        )
    )
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   AvgSpotLen = col_double(),
##   Bases = col_double(),
##   Bytes = col_double(),
##   ReleaseDate = col_datetime(format = "")
## )
## See spec(...) for full column specifications.
cell_metadata
embedding <- read_csv(
    file = "../exploring/embedding_ncomponents14_seed20200317.csv"
) %>%
    left_join(
        cell_metadata,
        by = c("cell" = "sample_name")
    ) %>%
    select(
        cell:y_umap, run:last_col()
    )
## Parsed with column specification:
## cols(
##   cell = col_character(),
##   batch = col_character(),
##   louvain = col_double(),
##   x_tsne = col_double(),
##   y_tsne = col_double(),
##   x_umap = col_double(),
##   y_umap = col_double(),
##   x_fitsne = col_double(),
##   y_fitsne = col_double(),
##   x_phate = col_double(),
##   y_phate = col_double(),
##   `x_min_dist=0.1` = col_double(),
##   `y_min_dist=0.1` = col_double(),
##   x_multicoretsne = col_double(),
##   y_multicoretsne = col_double()
## )
reticulate::py_discover_config()
## python:         /Users/jialei/.pyenv/shims/python
## libpython:      /Users/jialei/.pyenv/versions/3.8.2/lib/libpython3.8.dylib
## pythonhome:     /Users/jialei/.pyenv/versions/3.8.2:/Users/jialei/.pyenv/versions/3.8.2
## version:        3.8.2 (default, May 23 2020, 03:35:41)  [Clang 11.0.3 (clang-1103.0.32.62)]
## numpy:          /Users/jialei/.pyenv/versions/3.8.2/lib/python3.8/site-packages/numpy
## numpy_version:  1.19.0
## 
## NOTE: Python version was forced by RETICULATE_PYTHON
np <- reticulate::import("numpy", convert = TRUE)
scipy.sparse <- reticulate::import(module = "scipy.sparse", convert = TRUE)

matrix_readcount_use <- scipy.sparse$load_npz("../matrix_readcount.npz")
matrix_readcount_use_features <- np$load("../matrix_readcount_features.npy")
matrix_readcount_use_barcodes <- np$load("../matrix_readcount_barcodes.npy")
colnames(matrix_readcount_use) <- matrix_readcount_use_barcodes
rownames(matrix_readcount_use) <- matrix_readcount_use_features
matrix_readcount_use <- matrix_readcount_use[, embedding$cell]

# calculate CPM
matrix_cpm_use <- calc_cpm(matrix_readcount_use)

stopifnot(
    dim(matrix_readcount_use) == dim(matrix_cpm_use)
)
print(dim(matrix_readcount_use))
## [1] 33538   640
walk(list(embedding, matrix_readcount_use, matrix_cpm_use), function(x) {
    print(object.size(x), units = "auto", standard = "SI")
})
## 265.4 kB
## 84.3 MB
## 84.3 MB

Lineage determination

Cell selection

p_embedding_cluster <- plot_embedding(
    embedding = embedding[, c("x_tsne", "y_tsne")],
    color_values = embedding$louvain %>% as.factor(),
    label = "t-SNE; Cluster",
    # label_position = c(label_x, label_y),
    show_color_value_labels = TRUE,
    show_color_legend = FALSE,
    geom_point_size = 1,
    sort_values = FALSE
) +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 10")(
            length(unique(embedding$louvain))
        )
    )

p_embedding_UMIs <- plot_embedding_value(
    embedding = embedding[, c("x_tsne", "y_tsne")],
    color_values = colSums(matrix_readcount_use[, embedding$cell]),
    colorbar_position = c(0.85, 0.25),
    label = str_c("t-SNE; UMIs"),
    label_position = NULL,
    # label_position = c(label_x, label_y),
    geom_point_size = 1,
    sort_values = TRUE,
    FUN = NULL
)

p_embedding_developmental_stage <- plot_embedding(
    embedding = embedding[, c("x_tsne", "y_tsne")],
    color_values = embedding$developmental_stage,
    label = "t-SNE; Developmental stage",
    # label_position = c(label_x, label_y),
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 1,
    sort_values = FALSE
) +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 20")(
            length(levels(embedding$developmental_stage))
        )
    ) +
    labs(color = NULL) +
    guides(colour = guide_legend(override.aes = list(size = 3))) +
    customized_theme(x = 0.7, y = 0.55)

p_embedding_stage <- plot_embedding(
    embedding = embedding[, c("x_tsne", "y_tsne")],
    color_values = embedding$stage,
    label = "t-SNE; Stage",
    # label_position = c(label_x, label_y),
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 1,
    sort_values = FALSE
) +
    scale_color_manual(
        values = as.character(yarrr::piratepal(palette = "google"))
    ) +
    labs(color = NULL) +
    guides(colour = guide_legend(override.aes = list(size = 3))) +
    customized_theme()
cell_metadata %>%
    mutate(
        num_umis = matrix_readcount_use[, .$sample_name] %>% colSums(),
        num_genes = (matrix_readcount_use[, .$sample_name] > 0) %>% colSums()
    ) %>%
    group_by(developmental_stage) %>%
    summarise(
        num_cells = n(),
        num_umis = median(num_umis),
        num_genes = median(num_genes)
    ) %>%
    arrange(developmental_stage) %>%
    as.data.frame() %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
## `summarise()` ungrouping output (override with `.groups` argument)

developmental_stage num_cells num_umis num_genes
Day 6 42 982971.0 9208.5
Day 7 129 1409906.0 10455.0
Day 8 184 1186773.0 11199.0
Day 9 145 713436.0 10945.0
Day 10 108 805837.5 10452.5
Day 13 1 126406.0 17492.0
Day 14 6 310928.0 11563.5
Endometrial 25 1657972.0 9590.0

cell_metadata %>%
    mutate(
        num_umis = matrix_readcount_use[, .$sample_name] %>% colSums(),
        num_genes = (matrix_readcount_use[, .$sample_name] > 0) %>% colSums()
    ) %>%
    group_by(stage) %>%
    summarise(
        num_cells = n(),
        num_umis = median(num_umis),
        num_genes = median(num_genes)
    ) %>%
    arrange(stage) %>%
    as.data.frame() %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
## `summarise()` ungrouping output (override with `.groups` argument)

stage num_cells num_umis num_genes
Blastocyst 42 982971 9208.5
Implantation 313 1253483 10851.0
Post-Implantation 260 724805 10769.0
Endometrial 25 1657972 9590.0

purrr::reduce(list(
    p_embedding_cluster,
    p_embedding_UMIs,
    p_embedding_developmental_stage,
    p_embedding_stage
), `+`) +
    plot_layout(ncol = 2) +
    plot_annotation(
        theme = theme(plot.margin = margin())
    )

Cell selection based on clustering and Table S1.

cells_excluded <- embedding %>%
    filter(
        louvain == 3 | (developmental_stage == "Day 6" & louvain == 1) | (developmental_stage %in% c("Day 13", "Day 14"))
    ) %>%
    pull(cell)
cells_included <- embedding$cell[!embedding$cell %in% cells_excluded]

plot_embedding(
    embedding = embedding[, c("x_tsne", "y_tsne")],
    color_values = as.factor(as.integer(!embedding$cell %in% cells_excluded)),
    label = str_c(
        "t-SNE; Cells included: ",
        nrow(embedding) - length(cells_excluded)
    ),
    # label_position = c(label_x, label_y),
    show_color_value_labels = FALSE,
    show_color_legend = FALSE,
    geom_point_size = 1,
    sort_values = FALSE
) +
    scale_color_manual(
        values = c("grey70", "salmon")
    )
## Scale for 'colour' is already present. Adding another scale for 'colour',
## which will replace the existing scale.

cell_metadata %>%
    filter(sample_name %in% cells_included) %>%
    mutate(
        num_umis = matrix_readcount_use[, .$sample_name] %>% colSums(),
        num_genes = (matrix_readcount_use[, .$sample_name] > 0) %>% colSums()
    ) %>%
    group_by(stage) %>%
    summarise(
        num_cells = n(),
        num_umis = median(num_umis),
        num_genes = median(num_genes)
    ) %>%
    arrange(stage) %>%
    as.data.frame() %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
## `summarise()` ungrouping output (override with `.groups` argument)

stage num_cells num_umis num_genes
Blastocyst 32 1174108 9748.0
Implantation 310 1256794 10850.5
Post-Implantation 190 826354 10942.0

cell_metadata %>%
    filter(sample_name %in% cells_included) %>%
    mutate(
        num_umis = matrix_readcount_use[, .$sample_name] %>% colSums(),
        num_genes = (matrix_readcount_use[, .$sample_name] > 0) %>% colSums()
    ) %>%
    group_by(developmental_stage) %>%
    summarise(
        num_cells = n(),
        num_umis = median(num_umis),
        num_genes = median(num_genes)
    ) %>%
    arrange(developmental_stage) %>%
    as.data.frame() %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
## `summarise()` ungrouping output (override with `.groups` argument)

developmental_stage num_cells num_umis num_genes
Day 6 32 1174108.5 9748.0
Day 7 129 1409906.0 10455.0
Day 8 181 1193876.0 11215.0
Day 9 86 827823.0 11398.0
Day 10 104 805837.5 10452.5

Clustering using lineage markers

Preprocessing

Extract ~300 lineage marker genes from another study for clustering.

table_2_sheet1 <- readxl::read_excel(
    path = "../../../../docs/Single-Cell_RNA-Seq_Reveals_Lineage_and_X_Chromosome_Dynamics_in_Human_Preimplantation_Embryos/1-s2.0-S009286741630280X-mmc3.xlsx",
    sheet = "S2.maintlingenes",
    skip = 5
) %>%
    select(
        gene,
        lineage,
        `Fig4DE.genecluster`
    )
## New names:
## * EPI -> EPI...7
## * PE -> PE...8
## * TE -> TE...9
## * EPI -> EPI...10
## * PE -> PE...11
## * ...
table_2_sheet1
FEATURES_SELECTED <- rownames(matrix_readcount_use)[
    gene_symbo_info$X2 %in% table_2_sheet1$gene
]

Filter uninformative genes.

matrix_readcount_norm <- matrix_readcount_use[
    rownames(matrix_readcount_use) %in% FEATURES_SELECTED, cells_included
]
matrix_readcount_norm <- matrix_readcount_norm[
    Matrix::rowSums(
        matrix_readcount_norm
    ) >= MINIMAL_NUM_COUNTS_REQUIRED_FOR_GENE,
]
dim(matrix_readcount_norm)
## [1] 288 532

Median normalize matrix.

matrix_readcount_norm@x <- median(colSums(matrix_readcount_norm)) *
    (matrix_readcount_norm@x / rep.int(
        colSums(matrix_readcount_norm),
        diff(matrix_readcount_norm@p)
    ))

Dimensionality reduction

PCA

matrix_readcount_norm_log <- matrix_readcount_norm
matrix_readcount_norm_log@x <- log1p(matrix_readcount_norm_log@x)

# z-score
matrix_readcount_norm_log_scaled <- t(
    scale(
        t(
            matrix_readcount_norm_log
        ),
        center = TRUE, scale = TRUE
    )
)
# features_var <- apply(matrix_readcount_norm_log_scaled, 1, var)
# features_use <- rownames(matrix_readcount_norm_log_scaled)[features_var > 0]

pca_out <- prcomp(
    t(matrix_readcount_norm_log_scaled),
    center = FALSE,
    scale = FALSE
)
summary(pca_out)$imp %>%
    t() %>%
    as.data.frame() %>%
    rownames_to_column(var = "component") %>%
    mutate(
        rank = 1:n()
    ) %>%
    slice(1:30) %>%
    ggplot(
        aes(
            x = rank,
            y = `Proportion of Variance`
        )
    ) +
    geom_point(size = 0.3) +
    theme_bw() +
    scale_x_continuous(
        name = "Component",
        breaks = c(1, seq(5, 30, 5))
    ) +
    scale_y_continuous(
        name = "Proportion of variance",
        labels = scales::percent
    ) +
    theme(
        axis.title = ggplot2::element_text(family = "Arial", size = 6),
        axis.text = ggplot2::element_text(family = "Arial", size = 6),
        panel.grid.minor = ggplot2::element_blank()
    )

embedding_selected <- pca_out$x %>%
    as.data.frame() %>%
    rownames_to_column(var = "sample_name") %>%
    select(sample_name:PC3) %>%
    left_join(cell_metadata, by = "sample_name") %>%
    dplyr::rename(
        x_pca = PC1,
        y_pca = PC2,
        z_pca = PC3
    ) %>%
    relocate(x_pca, y_pca, z_pca, .after = last_col())
p_embedding_pca1 <- embedding_selected %>%
    mutate(
        category = "PCA"
    ) %>%
    sample_frac() %>%
    plot_pca(
        x = x_pca,
        y = y_pca,
        color = developmental_stage,
        shape = stage
    ) +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 20")(
            length(unique(embedding_selected$developmental_stage))
        )
    ) +
    scale_shape_manual(values = seq_along(levels(embedding_selected$stage))) +
    add_xy_label_pca()
p_embedding_pca1 <- embedding_selected %>%
    mutate(
        category = "PCA"
    ) %>%
    sample_frac() %>%
    plot_pca(
        x = x_pca,
        y = y_pca,
        color = developmental_stage,
        shape = stage
    ) +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 20")(
            length(unique(embedding_selected$developmental_stage))
        )
    ) +
    scale_shape_manual(values = seq_along(levels(embedding_selected$stage))) +
    add_xy_label_pca(x = "PC1", y = "PC2") # + customized_theme()

p_embedding_pca2 <- embedding_selected %>%
    mutate(
        category = "PCA"
    ) %>%
    sample_frac() %>%
    plot_pca(
        x = x_pca,
        y = z_pca,
        color = developmental_stage,
        shape = stage
    ) +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 20")(
            length(unique(embedding_selected$developmental_stage))
        )
    ) +
    scale_shape_manual(values = seq_along(levels(embedding_selected$stage))) +
    add_xy_label_pca(x = "PC1", y = "PC3") # + customized_theme()

t-SNE

N_COMPONENTS <- 6

set.seed(seed = SEED_USE)
embedding_rtsne <- Rtsne::Rtsne(
    X = pca_out$x[, 1:N_COMPONENTS],
    perplexity = 30,
    check_duplicates = FALSE,
    pca = FALSE,
    max_iter = 3000,
    verbose = TRUE
)$Y
## Read the 532 x 6 data matrix successfully!
## Using no_dims = 2, perplexity = 30.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 0.04 seconds (sparsity = 0.217953)!
## Learning embedding...
## Iteration 50: error is 58.528656 (50 iterations in 0.08 seconds)
## Iteration 100: error is 56.058021 (50 iterations in 0.05 seconds)
## Iteration 150: error is 56.046939 (50 iterations in 0.05 seconds)
## Iteration 200: error is 56.066100 (50 iterations in 0.05 seconds)
## Iteration 250: error is 56.087118 (50 iterations in 0.05 seconds)
## Iteration 300: error is 0.638294 (50 iterations in 0.05 seconds)
## Iteration 350: error is 0.581797 (50 iterations in 0.05 seconds)
## Iteration 400: error is 0.567847 (50 iterations in 0.05 seconds)
## Iteration 450: error is 0.561359 (50 iterations in 0.05 seconds)
## Iteration 500: error is 0.557746 (50 iterations in 0.05 seconds)
## Iteration 550: error is 0.554465 (50 iterations in 0.05 seconds)
## Iteration 600: error is 0.551538 (50 iterations in 0.05 seconds)
## Iteration 650: error is 0.548524 (50 iterations in 0.05 seconds)
## Iteration 700: error is 0.546396 (50 iterations in 0.05 seconds)
## Iteration 750: error is 0.544097 (50 iterations in 0.05 seconds)
## Iteration 800: error is 0.542233 (50 iterations in 0.05 seconds)
## Iteration 850: error is 0.540529 (50 iterations in 0.05 seconds)
## Iteration 900: error is 0.540004 (50 iterations in 0.05 seconds)
## Iteration 950: error is 0.540042 (50 iterations in 0.05 seconds)
## Iteration 1000: error is 0.540449 (50 iterations in 0.05 seconds)
## Iteration 1050: error is 0.540456 (50 iterations in 0.05 seconds)
## Iteration 1100: error is 0.540428 (50 iterations in 0.05 seconds)
## Iteration 1150: error is 0.539579 (50 iterations in 0.05 seconds)
## Iteration 1200: error is 0.539804 (50 iterations in 0.05 seconds)
## Iteration 1250: error is 0.539878 (50 iterations in 0.05 seconds)
## Iteration 1300: error is 0.539222 (50 iterations in 0.05 seconds)
## Iteration 1350: error is 0.538884 (50 iterations in 0.05 seconds)
## Iteration 1400: error is 0.539178 (50 iterations in 0.05 seconds)
## Iteration 1450: error is 0.539663 (50 iterations in 0.05 seconds)
## Iteration 1500: error is 0.539604 (50 iterations in 0.05 seconds)
## Iteration 1550: error is 0.539507 (50 iterations in 0.05 seconds)
## Iteration 1600: error is 0.539032 (50 iterations in 0.05 seconds)
## Iteration 1650: error is 0.538871 (50 iterations in 0.05 seconds)
## Iteration 1700: error is 0.538310 (50 iterations in 0.05 seconds)
## Iteration 1750: error is 0.536673 (50 iterations in 0.05 seconds)
## Iteration 1800: error is 0.536039 (50 iterations in 0.05 seconds)
## Iteration 1850: error is 0.535525 (50 iterations in 0.05 seconds)
## Iteration 1900: error is 0.535011 (50 iterations in 0.05 seconds)
## Iteration 1950: error is 0.535310 (50 iterations in 0.05 seconds)
## Iteration 2000: error is 0.535797 (50 iterations in 0.05 seconds)
## Iteration 2050: error is 0.535280 (50 iterations in 0.05 seconds)
## Iteration 2100: error is 0.535174 (50 iterations in 0.05 seconds)
## Iteration 2150: error is 0.534431 (50 iterations in 0.05 seconds)
## Iteration 2200: error is 0.534219 (50 iterations in 0.05 seconds)
## Iteration 2250: error is 0.533772 (50 iterations in 0.05 seconds)
## Iteration 2300: error is 0.533192 (50 iterations in 0.05 seconds)
## Iteration 2350: error is 0.533281 (50 iterations in 0.05 seconds)
## Iteration 2400: error is 0.532691 (50 iterations in 0.05 seconds)
## Iteration 2450: error is 0.532825 (50 iterations in 0.05 seconds)
## Iteration 2500: error is 0.533034 (50 iterations in 0.05 seconds)
## Iteration 2550: error is 0.532718 (50 iterations in 0.05 seconds)
## Iteration 2600: error is 0.532611 (50 iterations in 0.05 seconds)
## Iteration 2650: error is 0.532282 (50 iterations in 0.05 seconds)
## Iteration 2700: error is 0.532384 (50 iterations in 0.05 seconds)
## Iteration 2750: error is 0.532231 (50 iterations in 0.05 seconds)
## Iteration 2800: error is 0.532761 (50 iterations in 0.05 seconds)
## Iteration 2850: error is 0.532607 (50 iterations in 0.05 seconds)
## Iteration 2900: error is 0.532605 (50 iterations in 0.05 seconds)
## Iteration 2950: error is 0.532421 (50 iterations in 0.05 seconds)
## Iteration 3000: error is 0.532460 (50 iterations in 0.05 seconds)
## Fitting performed in 3.01 seconds.

embedding_selected <- cbind(
    embedding_selected,
    embedding_rtsne
) %>%
    dplyr::rename(
        x_tsne = "1",
        y_tsne = "2"
    )
p_embedding_tsne <- embedding_selected %>%
    mutate(
        category = "t-SNE"
    ) %>%
    sample_frac() %>%
    plot_pca(
        x = x_tsne,
        y = y_tsne,
        color = developmental_stage,
        shape = stage
    ) +
    labs(x = "Dim 1", y = "Dim 2") +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 20")(
            length(unique(embedding_selected$developmental_stage))
        )
    ) +
    scale_shape_manual(values = seq_along(levels(embedding_selected$stage))) # +
# add_xy_label_pca(show = FALSE) +
# customized_theme()


UMAP

N_COMPONENTS <- 6

set.seed(seed = SEED_USE)
embedding_umap <- uwot::umap(
    X = pca_out$x[, 1:N_COMPONENTS],
    n_neighbors = 15,
    n_components = 2,
    metric = "cosine",
    spread = 1,
    min_dist = 0.01,
    verbose = TRUE
)
## 18:38:23 UMAP embedding parameters a = 1.896 b = 0.8006
## 18:38:23 Read 532 rows and found 6 numeric columns
## 18:38:23 Using Annoy for neighbor search, n_neighbors = 15
## 18:38:23 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 18:38:24 Writing NN index file to temp file /var/folders/h1/78b2tkd552ngjl6_ps_5sw7r0000gn/T//RtmpLJClXw/file4e711f701a1a
## 18:38:24 Searching Annoy index using 4 threads, search_k = 1500
## 18:38:24 Annoy recall = 100%
## 18:38:24 Commencing smooth kNN distance calibration using 4 threads
## 18:38:24 Initializing from normalized Laplacian + noise
## 18:38:24 Commencing optimization for 500 epochs, with 9546 positive edges
## 18:38:25 Optimization finished

embedding_selected <- cbind(
    embedding_selected,
    embedding_umap
) %>%
    dplyr::rename(
        x_umap = "1",
        y_umap = "2"
    )
p_embedding_umap <- embedding_selected %>%
    mutate(
        category = "UMAP"
    ) %>%
    sample_frac() %>%
    plot_pca(
        x = x_umap,
        y = y_umap,
        color = developmental_stage,
        shape = stage
    ) +
    labs(x = "Dim 1", y = "Dim 2") +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 20")(
            length(unique(embedding_selected$developmental_stage))
        )
    ) +
    scale_shape_manual(values = seq_along(levels(embedding_selected$stage)))
# add_xy_label_pca(show = FALSE) +  customized_theme()


Fig. 2AB

purrr::reduce(list(
    p_embedding_pca1,
    p_embedding_pca2,
    p_embedding_tsne,
    p_embedding_umap
), `+`) +
    plot_layout(ncol = 2, guides = "collect") +
    plot_annotation(
        theme = theme(plot.margin = margin())
    )

Expression

SELECTED_FEATURE <- "psg1"
grep(SELECTED_FEATURE, rownames(matrix_cpm_use), ignore.case = TRUE, value = TRUE)
## [1] "ENSG00000116176_TPSG1" "ENSG00000231924_PSG1"  "ENSG00000243130_PSG11"
SELECTED_FEATURE <- "ENSG00000203909_DPPA5"
SELECTED_FEATURE <- "ENSG00000275410_HNF1B"

FEATURES_SELECTED <- c(
    "ENSG00000134853_PDGFRA",
    "ENSG00000275410_HNF1B",
    "ENSG00000125845_BMP2",
    "ENSG00000203909_DPPA5",
    "ENSG00000204531_POU5F1",
    "ENSG00000241186_TDGF1",
    "ENSG00000179348_GATA2",
    "ENSG00000107485_GATA3"
)

map(FEATURES_SELECTED, function(x) {
    plot_embedding_value(
        embedding = embedding_selected[, c("x_umap", "y_umap")],
        color_values = matrix_cpm_use[x, embedding_selected$sample_name],
        colorbar_position = c(0.85, 0.25),
        label = str_c("t-SNE; ", x),
        label_position = NULL,
        # label_position = c(label_x, label_y),
        geom_point_size = 1,
        sort_values = TRUE,
        FUN = NULL
    )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(ncol = 2, guides = "keep") +
    plot_annotation(
        theme = theme(plot.margin = margin())
    )

Profiling peri-implantation development

cells_PE <- c(
    "GSM3578305",
    "GSM3578308",
    "GSM3578339",
    "GSM3578505",
    "GSM3578651",
    "GSM3578653",
    "GSM3578682",
    "GSM3578696",
    "GSM3578697",
    "GSM3578707",
    "GSM3578729"
)

cells_EPI <- c(
    "GSM3578300",
    "GSM3578302",
    "GSM3578365",
    "GSM3578492",
    "GSM3578515",
    "GSM3578666",
    "GSM3578668",
    "GSM3578714",
    "GSM3578716",
    "GSM3578728"
)

embedding_selected %<>%
    mutate(
        lineage = case_when(
            sample_name %in% cells_PE ~ "PE",
            sample_name %in% cells_EPI ~ "EPI",
            TRUE ~ "TE"
        ),
        lineage = factor(
            lineage,
            levels = c("EPI", "PE", "TE")
        )
    )


embedding_selected %>%
    mutate(
        num_umis = matrix_readcount_use[, .$sample_name] %>% colSums(),
        num_genes = (matrix_readcount_use[, .$sample_name] > 0) %>% colSums()
    ) %>%
    group_by(lineage) %>%
    summarise(
        num_cells = n(),
        num_umis = median(num_umis),
        num_genes = median(num_genes)
    ) %>%
    arrange(lineage) %>%
    as.data.frame() %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
## `summarise()` ungrouping output (override with `.groups` argument)

lineage num_cells num_umis num_genes
EPI 10 1130471 11465.5
PE 11 1123643 10314.0
TE 511 1150046 10807.0

p_embedding_pca1_lineage <- plot_embedding(
    embedding = embedding_selected[, c("x_pca", "y_pca")],
    color_values = embedding_selected$lineage,
    label = "PCA; Lineage; PCA1, PCA2",
    # label_position = c(label_x, label_y),
    show_color_value_labels = FALSE,
    show_color_legend = FALSE,
    geom_point_size = 1,
    sort_values = FALSE
) +
    scale_color_manual(
        values = as.character(yarrr::piratepal(palette = "google"))
    ) +
    labs(color = NULL) +
    guides(colour = guide_legend(override.aes = list(size = 3))) +
    customized_theme()

p_embedding_pca2_lineage <- plot_embedding(
    embedding = embedding_selected[, c("x_pca", "z_pca")],
    color_values = embedding_selected$lineage,
    label = "PCA; Lineage; PCA1, PCA3",
    # label_position = c(label_x, label_y),
    show_color_value_labels = FALSE,
    show_color_legend = FALSE,
    geom_point_size = 1,
    sort_values = FALSE
) +
    scale_color_manual(
        values = as.character(yarrr::piratepal(palette = "google"))
    ) +
    labs(color = NULL) +
    guides(colour = guide_legend(override.aes = list(size = 3))) +
    customized_theme(y = 0.25)

p_embedding_tsne_lineage <- plot_embedding(
    embedding = embedding_selected[, c("x_tsne", "y_tsne")],
    color_values = embedding_selected$lineage,
    label = "t-SNE; Lineage",
    # label_position = c(label_x, label_y),
    show_color_value_labels = FALSE,
    show_color_legend = FALSE,
    geom_point_size = 1,
    sort_values = FALSE
) +
    scale_color_manual(
        values = as.character(yarrr::piratepal(palette = "google"))
    ) +
    labs(color = NULL) +
    guides(colour = guide_legend(override.aes = list(size = 3))) +
    customized_theme(y = 0.25)

p_embedding_umap_lineage <- plot_embedding(
    embedding = embedding_selected[, c("x_umap", "y_umap")],
    color_values = embedding_selected$lineage,
    label = "UMAP; Lineage",
    # label_position = c(label_x, label_y),
    show_color_value_labels = FALSE,
    show_color_legend = FALSE,
    geom_point_size = 1,
    sort_values = FALSE
) +
    scale_color_manual(
        values = as.character(yarrr::piratepal(palette = "google"))
    ) +
    labs(color = NULL) +
    guides(colour = guide_legend(override.aes = list(size = 3))) +
    customized_theme(y = 0.25)


Fig. 2C

purrr::reduce(list(
    p_embedding_pca1_lineage,
    p_embedding_pca2_lineage,
    p_embedding_tsne_lineage,
    p_embedding_umap_lineage
), `+`) +
    plot_layout(ncol = 2) +
    plot_annotation(
        theme = theme(plot.margin = margin())
    )

TE development

Visualization of TE clustering

embedding_TE <- read_csv(
    file = "../reclustering_TE/exploring/embedding_ncomponents7_seed20200317.csv"
) %>%
    select(
        cell:y_tsne, x_phate, y_phate
    ) %>%
    left_join(
        embedding %>% select(cell, developmental_stage:stage)
    )
## Parsed with column specification:
## cols(
##   cell = col_character(),
##   batch = col_character(),
##   louvain = col_double(),
##   x_tsne = col_double(),
##   y_tsne = col_double(),
##   x_umap = col_double(),
##   y_umap = col_double(),
##   x_fitsne = col_double(),
##   y_fitsne = col_double(),
##   x_phate = col_double(),
##   y_phate = col_double(),
##   `x_min_dist=0.1` = col_double(),
##   `y_min_dist=0.1` = col_double(),
##   x_multicoretsne = col_double(),
##   y_multicoretsne = col_double()
## )
## Joining, by = "cell"
embedding_TE
p_embedding_TE_cluster <- plot_embedding(
    embedding = embedding_TE[, c("x_tsne", "y_tsne")],
    color_values = embedding_TE$louvain %>% as.factor(),
    label = "t-SNE; Cluster",
    # label_position = c(label_x, label_y),
    show_color_value_labels = TRUE,
    show_color_legend = FALSE,
    geom_point_size = 1,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding_TE$louvain)))
    )

p_embedding_TE_cluster_phate <- plot_embedding(
    embedding = embedding_TE[, c("x_phate", "y_phate")],
    color_values = embedding_TE$louvain %>% as.factor(),
    label = "PHATE; Cluster",
    # label_position = c(label_x, label_y),
    show_color_value_labels = TRUE,
    show_color_legend = FALSE,
    geom_point_size = 1,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding_TE$louvain)))
    )

p_embedding_TE_developmental_stage <- plot_embedding(
    embedding = embedding_TE[, c("x_tsne", "y_tsne")],
    color_values = embedding_TE$developmental_stage,
    label = "t-SNE; Developmental stage",
    # label_position = c(label_x, label_y),
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 1,
    sort_values = FALSE
) +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 20")(
            length(levels(embedding_TE$developmental_stage))
        )
    ) +
    labs(color = NULL) +
    guides(colour = guide_legend(override.aes = list(size = 3))) +
    customized_theme(y = 0.35)

p_embedding_TE_developmental_stage_phate <- plot_embedding(
    embedding = embedding_TE[, c("x_phate", "y_phate")],
    color_values = embedding_TE$developmental_stage,
    label = "PHATE; Developmental stage",
    # label_position = c(label_x, label_y),
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 1,
    sort_values = FALSE
) +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 20")(
            length(levels(embedding_TE$developmental_stage))
        )
    ) +
    labs(color = NULL) +
    guides(colour = guide_legend(override.aes = list(size = 3))) +
    customized_theme(x = 0.7, y = 0.99)
embedding_TE %>%
    mutate(
        num_umis = matrix_readcount_use[, .$cell] %>% colSums(),
        num_genes = (matrix_readcount_use[, .$cell] > 0) %>% colSums()
    ) %>%
    group_by(louvain) %>%
    summarise(
        num_cells = n(),
        num_umis = median(num_umis),
        num_genes = median(num_genes)
    ) %>%
    arrange(louvain) %>%
    as.data.frame() %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
## `summarise()` ungrouping output (override with `.groups` argument)

louvain num_cells num_umis num_genes
0 104 1255710 11575.5
1 97 762959 11175.0
2 94 1176730 10511.5
3 91 1219712 10492.0
4 91 1312327 9973.0
5 55 1090194 10876.0

embedding_TE %>%
    mutate(
        num_umis = matrix_readcount_use[, .$cell] %>% colSums(),
        num_genes = (matrix_readcount_use[, .$cell] > 0) %>% colSums()
    ) %>%
    group_by(developmental_stage) %>%
    summarise(
        num_cells = n(),
        num_umis = median(num_umis),
        num_genes = median(num_genes)
    ) %>%
    arrange(developmental_stage) %>%
    as.data.frame() %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
## `summarise()` ungrouping output (override with `.groups` argument)

developmental_stage num_cells num_umis num_genes
Day 6 32 1174108.5 9748.0
Day 7 129 1409906.0 10455.0
Day 8 181 1193876.0 11215.0
Day 9 86 827823.0 11398.0
Day 10 104 805837.5 10452.5

Fig. 4AB

purrr::reduce(list(
    p_embedding_TE_cluster,
    p_embedding_TE_cluster_phate,
    p_embedding_TE_developmental_stage,
    p_embedding_TE_developmental_stage_phate
), `+`) +
    plot_layout(ncol = 2, byrow = TRUE) +
    plot_annotation(
        theme = theme(plot.margin = margin())
    )

Clustering distribution


Fig. S3A

prepare_cluster_composition(
    embedding = droplevels(embedding_TE),
    x = developmental_stage, group = louvain
) %>%
    mutate(louvain = factor(
        louvain,
        levels = sort(unique(embedding_TE$louvain))
    )) %>%
    arrange(louvain) %>%
    plot_barplot(x = developmental_stage, y = percentage, z = louvain) +
    guides(fill = guide_legend(override.aes = list(size = 3))) +
    scale_fill_manual(
        values = gg_color_hue(n = length(unique(embedding_TE$louvain)))
    )
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)

Marker expression

Barplot

# SELECTED_FEATURE <- "FZD5"
# grep(SELECTED_FEATURE, rownames(matrix_cpm_use), ignore.case = TRUE, value = TRUE)

FEATURES_SELECTED <- c(
    "ENSG00000161638_ITGA5",
    "ENSG00000204632_HLA-G",
    "ENSG00000115414_FN1",
    "ENSG00000087245_MMP2",
    "ENSG00000010278_CD9",
    "ENSG00000213949_ITGA1",
    #
    "ENSG00000189052_CGB5",
    "ENSG00000231924_PSG1",
    "ENSG00000203857_HSD3B1",
    "ENSG00000137869_CYP19A1",
    "ENSG00000115884_SDC1",
    "ENSG00000123999_INHA",
    "ENSG00000242950_ERVW-1",
    "ENSG00000269526_ERVV-1",
    "ENSG00000135346_CGA",
    #
    "ENSG00000091409_ITGA6",
    "ENSG00000073282_TP63",
    "ENSG00000168036_CTNNB1",
    "ENSG00000162337_LRP5",
    "ENSG00000197905_TEAD4",
    "ENSG00000135374_ELF5",
    "ENSG00000066468_FGFR2",
    "ENSG00000163251_FZD5"
)
map(FEATURES_SELECTED, function(x) {
    embedding_TE %>%
        mutate(feature_value = matrix_cpm_use[x, cell]) %>%
        group_by(developmental_stage, louvain) %>%
        summarise(mean_expr = mean(feature_value)) %>%
        plot_barplot_feature(
            x = developmental_stage,
            y = mean_expr,
            z = "louvain",
            title = x
        ) +
        scale_fill_manual(
            values = ggthemes::tableau_color_pal("Tableau 20")(
                length(levels(embedding_TE$developmental_stage))
            )
        ) +
        guides(fill = "none")
}) %>%
    purrr::reduce(`+`) +
    plot_layout(ncol = 1) +
    plot_annotation(
        tag_levels = c("A"),
        theme = theme(plot.margin = margin())
    ) &
    theme(
        plot.tag.position = c(0, 1),
        plot.tag = element_text(
            family = "Arial",
            size = 7,
            hjust = 0,
            vjust = 0
        )
    )
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)

embedding_TE %<>%
    mutate(
        lineage_te = case_when(
            louvain %in% c(1) ~ "EVT",
            louvain %in% c(2) ~ "ST",
            louvain %in% c(5) ~ "CT",
            TRUE ~ "Undiff"
        ),
        lineage_te = factor(
            lineage_te,
            levels = c("EVT", "ST", "CT", "Undiff")
        )
    )
embedding_TE <- droplevels(embedding_TE)


Fig. 4E

prepare_cluster_composition(
    embedding = droplevels(embedding_TE),
    x = developmental_stage, group = lineage_te
) %>%
    arrange(lineage_te) %>%
    plot_barplot(x = developmental_stage, y = percentage, z = lineage_te) +
    guides(fill = guide_legend(override.aes = list(size = 3))) +
    scale_fill_manual(
        values =

            RColorBrewer::brewer.pal(
                n = nlevels(embedding_TE$lineage_te),
                name = "Dark2"
            )
    )
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'developmental_stage' (override with `.groups` argument)

Heatmap

matrix_heatmap <- calc_cpm(m = matrix_readcount_use)[FEATURES_SELECTED, ]
matrix_heatmap <- matrix_heatmap[rowSums(matrix_heatmap) != 0, ]
matrix_heatmap <- log10(matrix_heatmap + 1)
matrix_heatmap <- t(scale(t(matrix_heatmap)))
# rownames(matrix_heatmap) <- str_remove(
#     string = rownames(matrix_heatmap),
#     pattern = "^E.+_"
# )

(heatmap_limits <- quantile(matrix_heatmap, c(0.05, 0.95)))
##        5%       95% 
## -1.683433  1.537303
matrix_heatmap[matrix_heatmap < heatmap_limits[1]] <- heatmap_limits[1]
matrix_heatmap[matrix_heatmap > heatmap_limits[2]] <- heatmap_limits[2]
# order cells
embedding_TE$lineage_te %>% table()
## .
##    EVT     ST     CT Undiff 
##     97     94     55    286
cells_heatmap <- map(c("EVT", "ST", "CT"), function(x) {
    cells_in_group <- embedding_TE %>%
        filter(lineage_te == x) %>%
        pull(cell)

    cells_in_group[
        pvclust::pvclust(
            matrix_heatmap[, cells_in_group],
            nboot = 100,
            parallel = parallel::detectCores() - 2L
        )$hclust$order
    ]
}) %>%
    unlist()
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
## Creating a temporary cluster...done:
## socket cluster with 6 nodes on host 'localhost'
## Multiscale bootstrap... Done.
# order features, index
if (FALSE) {
    features_heatmap <- pvclust::pvclust(
        t(matrix_heatmap[, cells_heatmap]),
        nboot = 100,
        parallel = parallel::detectCores() - 2L
    )$hclust$order
}

features_heatmap <- c(
    "ENSG00000115414_FN1",
    "ENSG00000204632_HLA-G",
    "ENSG00000087245_MMP2",
    "ENSG00000161638_ITGA5",
    "ENSG00000010278_CD9",
    "ENSG00000213949_ITGA1",
    #
    "ENSG00000231924_PSG1",
    "ENSG00000203857_HSD3B1",
    "ENSG00000137869_CYP19A1",
    "ENSG00000115884_SDC1",
    "ENSG00000123999_INHA",
    "ENSG00000242950_ERVW-1",
    "ENSG00000269526_ERVV-1",
    "ENSG00000135346_CGA",
    "ENSG00000189052_CGB5",
    #
    "ENSG00000168036_CTNNB1",
    "ENSG00000091409_ITGA6",
    "ENSG00000162337_LRP5",
    "ENSG00000073282_TP63",
    "ENSG00000197905_TEAD4",
    "ENSG00000135374_ELF5",
    "ENSG00000066468_FGFR2",
    "ENSG00000163251_FZD5"
)
matrix_heatmap <- matrix_heatmap[features_heatmap, cells_heatmap]
ha_columns <- ComplexHeatmap::HeatmapAnnotation(
    lineage = ComplexHeatmap::anno_simple(
        embedding_TE[match(
            x = colnames(matrix_heatmap),
            table = embedding_TE$cell
        ), "lineage_te"],
        # pch = anno_labels_cluster,
        col = setNames(
            object = RColorBrewer::brewer.pal(
                n = nlevels(embedding_TE$lineage_te),
                name = "Dark2"
            ),
            nm = levels(embedding_TE$lineage_te)
        ),
        which = "column",
        pt_size = unit(2, "mm"),
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = unit(3, "mm")
    ),
    #
    developmental_stage = ComplexHeatmap::anno_simple(
        embedding_TE[match(
            x = colnames(matrix_heatmap),
            table = embedding_TE$cell
        ), "developmental_stage"],
        # pch = anno_labels_lineage,
        col = setNames(
            object = ggthemes::tableau_color_pal("Tableau 20")(
                embedding_TE$developmental_stage %>%
                    droplevels() %>%
                    nlevels()
            ),
            nm = embedding_TE$developmental_stage %>%
                droplevels() %>%
                levels()
        ),
        which = "column",
        # pt_size = unit(2, "mm"),
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = unit(3, "mm")
    ),
    show_annotation_name = TRUE,
    annotation_label = c(
        "Lineage",
        "Developmental stage"
    ),
    annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    annotation_name_side = "left"
)
ha_left <- ComplexHeatmap::HeatmapAnnotation(
    lineage = ComplexHeatmap::anno_simple(
        features_heatmap,
        # pch = anno_labels_lineage,
        col = setNames(
            object = c(
                "#1B9E77",
                "#1B9E77",
                "#1B9E77",
                "#1B9E77",
                "#1B9E77",
                "#1B9E77",
                # ,
                "#D95F02",
                "#D95F02",
                "#D95F02",
                "#D95F02",
                "#D95F02",
                "#D95F02",
                "#D95F02",
                "#D95F02",
                "#D95F02",
                # ,
                "#7570B3",
                "#7570B3",
                "#7570B3",
                "#7570B3",
                "#7570B3",
                "#7570B3",
                "#7570B3",
                "#7570B3"
            ),
            nm = features_heatmap
        ),
        which = "row",
        # pt_size = unit(2, "mm"),
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = unit(3, "mm")
    ),
    which = "row",
    show_annotation_name = FALSE,
    annotation_label = c(
        "Lineage"
    ),
    annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6)
)
lgd_lineage <- ComplexHeatmap::Legend(
    title = "Lineage",
    labels = levels(embedding_TE$lineage_te)[-nlevels(embedding_TE$lineage_te)],
    legend_gp = grid::gpar(
        fill = RColorBrewer::brewer.pal(
            n = nlevels(embedding_TE$lineage_te) - 1,
            name = "Dark2"
        )
    ),
    labels_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 6
    ),
    title_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 6
    )
)

lgd_developmental_stage <- ComplexHeatmap::Legend(
    title = "Develpmental stage",
    labels = levels(embedding_TE$developmental_stage),
    legend_gp = grid::gpar(fill = ggthemes::tableau_color_pal("Tableau 20")(
        nlevels(embedding_TE$developmental_stage)
    )),
    labels_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 6
    ),
    title_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 6
    )
)
ht <- ComplexHeatmap::Heatmap(
    matrix = matrix_heatmap,
    # col =  viridis::plasma(n = 10),
    col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
    #
    cluster_rows = FALSE,
    cluster_columns = FALSE,
    #
    row_labels = str_remove(string = rownames(matrix_heatmap), pattern = "^E.+_"),
    row_names_side = c("left"),
    row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    show_row_names = TRUE,
    show_column_names = FALSE,
    #
    show_row_dend = FALSE,
    show_column_dend = FALSE,
    #
    show_heatmap_legend = TRUE,
    top_annotation = ha_columns,
    left_annotation = ha_left,
    #
    heatmap_legend_param = list(
        title = "Z score",
        title_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 6
        ),
        legend_direction = "vertical",
        labels_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 6
        ),
        legend_height = unit(25, "mm"),
        legend_width = unit(10, "mm")
    ),
    #
    use_raster = TRUE
)
pd <- ComplexHeatmap::packLegend(
    lgd_lineage,
    lgd_developmental_stage,
    # gap = unit(8, "mm"),
    direction = "vertical"
)


Fig. 4C

ComplexHeatmap::draw(
    ht,
    heatmap_legend_list = list(pd)
)

R session info

devtools::session_info()$platform
##  setting  value                       
##  version  R version 4.0.2 (2020-06-22)
##  os       macOS Catalina 10.15.6      
##  system   x86_64, darwin19.5.0        
##  ui       unknown                     
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/Chicago             
##  date     2020-08-12
devtools::session_info()$pack %>%
    as_tibble() %>%
    select(
        package,
        loadedversion,
        date,
        `source`
    ) %>%
    # print(n = nrow(.))
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
package loadedversion date source
assertthat 0.2.1 2019-03-21 CRAN (R 4.0.0)
backports 1.1.8 2020-06-17 CRAN (R 4.0.1)
BayesFactor 0.9.12-4.2 2018-05-19 CRAN (R 4.0.2)
blob 1.2.1 2020-01-20 CRAN (R 4.0.0)
broom 0.7.0.9001 2020-08-11 Github (tidymodels/broom@8992c71)
callr 3.4.3.9000 2020-07-31 Github (r-lib/callr@b96da8f)
cellranger 1.1.0 2016-07-27 CRAN (R 4.0.0)
checkmate 2.0.0 2020-02-06 CRAN (R 4.0.0)
circlize 0.4.10 2020-06-15 CRAN (R 4.0.1)
cli 2.0.2 2020-02-28 CRAN (R 4.0.0)
clue 0.3-57 2019-02-25 CRAN (R 4.0.0)
cluster 2.1.0 2019-06-19 CRAN (R 4.0.2)
coda 0.19-3 2019-07-05 CRAN (R 4.0.0)
codetools 0.2-16 2018-12-24 CRAN (R 4.0.2)
colorspace 1.4-1 2019-03-18 CRAN (R 4.0.0)
ComplexHeatmap 2.4.3 2020-07-25 Bioconductor
crayon 1.3.4 2017-09-16 CRAN (R 4.0.0)
DBI 1.1.0 2019-12-15 CRAN (R 4.0.0)
dbplyr 1.4.4.9000 2020-07-28 Github (tidyverse/dbplyr@a6ed629)
desc 1.2.0 2018-05-01 CRAN (R 4.0.0)
devtools 2.3.1.9000 2020-07-31 Github (r-lib/devtools@df619ce)
digest 0.6.25 2020-02-23 CRAN (R 4.0.0)
dplyr 1.0.2 2020-08-12 Github (tidyverse/dplyr@0bea3e8)
ellipsis 0.3.1.9000 2020-07-18 Github (r-lib/ellipsis@57a5071)
evaluate 0.14 2019-05-28 CRAN (R 4.0.0)
extrafont 0.17 2014-12-08 CRAN (R 4.0.2)
extrafontdb 1.0 2012-06-11 CRAN (R 4.0.0)
fansi 0.4.1 2020-01-08 CRAN (R 4.0.0)
farver 2.0.3 2020-01-16 CRAN (R 4.0.0)
forcats 0.5.0.9000 2020-05-28 Github (tidyverse/forcats@ab81d1b)
fs 1.5.0.9000 2020-08-03 Github (r-lib/fs@93e70a9)
generics 0.0.2 2018-11-29 CRAN (R 4.0.0)
GetoptLong 1.0.2 2020-07-06 CRAN (R 4.0.2)
ggplot2 3.3.2.9000 2020-08-04 Github (tidyverse/ggplot2@6d91349)
ggrepel 0.9.0 2020-07-24 Github (slowkow/ggrepel@4d0ef50)
ggthemes 4.2.0 2019-05-13 CRAN (R 4.0.0)
GlobalOptions 0.1.2 2020-06-10 CRAN (R 4.0.0)
glue 1.4.1.9000 2020-07-07 Github (tidyverse/glue@205f18b)
gt 0.2.2 2020-08-06 Github (rstudio/gt@c97cb4c)
gtable 0.3.0 2019-03-25 CRAN (R 4.0.0)
gtools 3.8.2 2020-03-31 CRAN (R 4.0.0)
haven 2.3.1 2020-06-01 CRAN (R 4.0.0)
hms 0.5.3 2020-01-08 CRAN (R 4.0.0)
htmltools 0.5.0 2020-06-16 CRAN (R 4.0.1)
httr 1.4.2 2020-07-20 CRAN (R 4.0.2)
jpeg 0.1-8.1 2019-10-24 CRAN (R 4.0.0)
jsonlite 1.7.0 2020-06-25 CRAN (R 4.0.2)
knitr 1.29 2020-06-23 CRAN (R 4.0.2)
labeling 0.3 2014-08-23 CRAN (R 4.0.0)
lattice 0.20-41 2020-04-02 CRAN (R 4.0.2)
lifecycle 0.2.0 2020-03-06 CRAN (R 4.0.0)
lubridate 1.7.9 2020-07-11 Github (tidyverse/lubridate@de2ee09)
magrittr 1.5.0.9000 2020-08-04 Github (tidyverse/magrittr@ba52096)
Matrix 1.2-18 2019-11-27 CRAN (R 4.0.2)
MatrixModels 0.4-1 2015-08-22 CRAN (R 4.0.0)
memoise 1.1.0 2017-04-21 CRAN (R 4.0.0)
modelr 0.1.8.9000 2020-05-19 Github (tidyverse/modelr@16168e0)
munsell 0.5.0 2018-06-12 CRAN (R 4.0.0)
mvtnorm 1.1-1 2020-06-09 CRAN (R 4.0.2)
patchwork 1.0.1.9000 2020-06-22 Github (thomasp85/patchwork@82a5e03)
pbapply 1.4-2 2019-08-31 CRAN (R 4.0.0)
pillar 1.4.6.9000 2020-07-21 Github (r-lib/pillar@8aef8f2)
pkgbuild 1.1.0.9000 2020-07-14 Github (r-lib/pkgbuild@3a87bd9)
pkgconfig 2.0.3 2019-09-22 CRAN (R 4.0.0)
pkgload 1.1.0 2020-05-29 CRAN (R 4.0.0)
png 0.1-7 2013-12-03 CRAN (R 4.0.0)
prettyunits 1.1.1.9000 2020-08-10 Github (r-lib/prettyunits@b1cdad8)
processx 3.4.3 2020-07-05 CRAN (R 4.0.2)
ps 1.3.4 2020-08-11 CRAN (R 4.0.2)
purrr 0.3.4.9000 2020-08-10 Github (tidyverse/purrr@5de5ad2)
pvclust 2.2-0 2020-04-25 Github (shimo-lab/pvclust@e734d05)
R6 2.4.1.9000 2020-07-18 Github (r-lib/R6@1415d11)
RColorBrewer 1.1-2 2014-12-07 CRAN (R 4.0.0)
Rcpp 1.0.5 2020-07-06 CRAN (R 4.0.2)
RcppAnnoy 0.0.16 2020-03-08 CRAN (R 4.0.0)
readr 1.3.1.9000 2020-07-16 Github (tidyverse/readr@2ab51b2)
readxl 1.3.1.9000 2020-05-28 Github (tidyverse/readxl@3815961)
remotes 2.2.0.9000 2020-08-06 Github (r-lib/remotes@5a546ad)
reprex 0.3.0 2019-05-16 CRAN (R 4.0.0)
reticulate 1.16 2020-05-27 CRAN (R 4.0.2)
rjson 0.2.20 2018-06-08 CRAN (R 4.0.0)
rlang 0.4.7.9000 2020-08-12 Github (r-lib/rlang@de0c176)
rmarkdown 2.3.3 2020-07-25 Github (rstudio/rmarkdown@204aa41)
rprojroot 1.3-2 2018-01-03 CRAN (R 4.0.0)
RSpectra 0.16-0 2019-12-01 CRAN (R 4.0.2)
rstudioapi 0.11.0-9000 2020-07-31 Github (rstudio/rstudioapi@aa17630)
Rtsne 0.16 2020-07-03 Github (jkrijthe/Rtsne@14b195f)
Rttf2pt1 1.3.8 2020-01-10 CRAN (R 4.0.0)
rvest 0.3.6 2020-07-25 CRAN (R 4.0.2)
sass 0.2.0 2020-03-18 CRAN (R 4.0.2)
scales 1.1.1.9000 2020-07-24 Github (r-lib/scales@9ff4757)
sessioninfo 1.1.1.9000 2020-07-18 Github (r-lib/sessioninfo@791705b)
shape 1.4.4 2018-02-07 CRAN (R 4.0.0)
stringi 1.4.6 2020-02-17 CRAN (R 4.0.0)
stringr 1.4.0.9000 2020-06-01 Github (tidyverse/stringr@f70c4ba)
styler 1.3.2.9000 2020-07-25 Github (r-lib/styler@16d815e)
testthat 2.99.0.9000 2020-08-12 Github (r-lib/testthat@9e643d8)
tibble 3.0.3.9000 2020-07-21 Github (tidyverse/tibble@b4eec19)
tidyr 1.1.1.9000 2020-07-31 Github (tidyverse/tidyr@61e9209)
tidyselect 1.1.0.9000 2020-07-11 Github (tidyverse/tidyselect@69fdc96)
tidyverse 1.3.0.9000 2020-06-01 Github (hadley/tidyverse@8a0bb99)
usethis 1.6.1.9001 2020-08-06 Github (r-lib/usethis@51fcdb8)
uwot 0.1.8.9000 2020-08-03 Github (jlmelville/uwot@db9e397)
vctrs 0.3.2.9000 2020-08-12 Github (r-lib/vctrs@ff4984f)
viridisLite 0.3.0 2018-02-01 CRAN (R 4.0.0)
wesanderson 0.3.6.9000 2020-06-05 Github (karthik/wesanderson@d90700a)
withr 2.2.0 2020-04-20 CRAN (R 4.0.0)
xfun 0.16 2020-07-24 CRAN (R 4.0.2)
xml2 1.3.2 2020-04-23 CRAN (R 4.0.0)
yaml 2.2.1 2020-02-01 CRAN (R 4.0.0)
yarrr 0.1.6 2020-07-23 Github (ndphillips/yarrr@e2e4488)