Zhou, Y., Song, W.M., Andhey, P.S., Swain, A., Levy, T., Miller, K.R., Poliani, P.L., Cominelli, M., Grover, S., Gilfillan, S., et al. (2020). Human and mouse single-nucleus transcriptomics reveal TREM2-dependent and TREM2-independent cellular responses in Alzheimer’s disease. Nat. Med. 26, 131–142.

  • BioProject Accession: PRJNA590042
  • GEO Accession: GSE140510



Load required packages.

library(tidyverse)
library(magrittr)
library(Matrix)
library(extrafont)
library(ggrepel)
library(patchwork)
# library(tidylog)
Sys.time()
## [1] "2020-12-17 19:25:41 CST"

Data preparation

Functions loading

source(
    file = file.path(
        SCRIPT_DIR,
        "utilities.R"
    )
)

load_matrix <- function(x) {
    matrix_readcount_use <- scipy.sparse$load_npz(
        file.path(x, "matrix_readcount.npz")
    )

    colnames(matrix_readcount_use) <- np$load(
        file.path(x, "matrix_readcount_barcodes.npy")
    )

    rownames(matrix_readcount_use) <- np$load(
        file.path(x, "matrix_readcount_features.npy")
    )

    return(matrix_readcount_use)
}

Data loading

PROJECT_DIR <- "/Users/jialei/Dropbox/Data/Projects/UTSW/Collaborations/Microglia/raw/public/PRJNA590042/remapped"

Matrix

np <- reticulate::import("numpy", convert = TRUE)
scipy.sparse <- reticulate::import(module = "scipy.sparse", convert = TRUE)

matrix_dir <- list(
    "matrices"
)

matrix_readcount_use <- purrr::map(matrix_dir, function(x) {
    load_matrix(file.path(PROJECT_DIR, x))
}) %>%
    purrr::reduce(cbind)

walk(list(matrix_readcount_use, matrix_dir), function(x) {
    print(object.size(x), units = "auto", standard = "SI")
})
## 3.4 GB
## 176 B
# clean up
rm(matrix_dir)

Embedding

EMBEDDING_FILE <- "embedding_ncomponents35_ccc1_seed20200416.csv.gz"

embedding <- read_csv(
    file = file.path(
        PROJECT_DIR,
        "clustering",
        "exploring",
        EMBEDDING_FILE
    )
)

# clean up
rm(EMBEDDING_FILE)

Metadata

cell_metadata_PRJNA590042 <- read_delim(
    file.path(
        PROJECT_DIR,
        "raw/",
        "SraRunTable.csv"
    ),
    delim = ","
) %>%
    select(
        run = Run,
        age = Age,
        sample_name = `Sample Name`,
        mouse_genotype,
        source_name
    )

embedding %<>%
    left_join(
        cell_metadata_PRJNA590042 %>%
            select(sample_name, genotype = mouse_genotype),
        by = c("batch" = "sample_name")
    ) %>%
    mutate(
        genotype = factor(
            genotype,
            levels = c("wt", "5XFAD", "Trem2-/-", "Trem2-/- 5XFAD")
        ) %>% fct_recode(WT = "wt")
    )

cell_metadata_PRJNA590042

Distinguishing major brain-cell types

Summary

embedding %>%
    mutate(
        num_umis = colSums(matrix_readcount_use[, cell]),
        num_genes = colSums(matrix_readcount_use[, cell] > 0)
    ) %>%
    group_by(louvain) %>%
    summarise(
        num_cells = n(),
        median_umis = median(num_umis),
        median_genes = median(num_genes)
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
## `summarise()` has ungrouped output. You can override using the `.groups` argument.
louvain num_cells median_umis median_genes
0 8953 2509.0 1439.0
1 7856 14239.0 4261.0
2 6987 13770.0 4518.0
3 6372 11030.0 3962.5
4 6252 13780.0 4370.0
5 5856 701.5 547.0
6 5070 18558.5 5025.0
7 5029 16350.0 4841.0
8 4479 3236.0 1718.0
9 3972 731.0 538.0
10 3906 4403.0 2246.0
11 3696 2704.0 1721.0
12 2964 11285.0 4214.5
13 2484 12135.5 4364.5
14 2316 8534.5 3514.5
15 2215 5280.0 2586.0
16 1752 23367.5 5638.0
17 1740 10955.5 4105.5
18 1610 6057.5 2650.0
19 996 7048.0 3065.0
20 906 10770.5 3921.5
21 814 5551.0 2565.0
22 682 16883.5 4925.0
23 584 1835.0 1225.5
24 537 884.0 640.0
25 528 2872.0 1742.0
26 474 3503.0 2050.5
27 312 4221.5 2078.0
28 238 2310.5 1524.5
29 134 13254.0 4724.0
30 112 5206.0 2697.0
31 47 9194.0 3847.0


embedding %>%
    mutate(
        num_umis = colSums(matrix_readcount_use[, cell]),
        num_genes = colSums(matrix_readcount_use[, cell] > 0)
    ) %>%
    group_by(genotype) %>%
    summarise(
        num_cells = n(),
        median_umis = median(num_umis),
        median_genes = median(num_genes)
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
## `summarise()` has ungrouped output. You can override using the `.groups` argument.
genotype num_cells median_umis median_genes
WT 24034 7677.0 3313.5
5XFAD 22069 5824.0 2633.0
Trem2-/- 22884 7802.0 3325.0
Trem2-/- 5XFAD 20886 8696.5 3472.5


purrr::map(sort(unique(embedding$louvain)), function(x) {
    cells_in_group <- embedding %>%
        filter(louvain == x) %>%
        pull(cell)

    colSums(matrix_readcount_use[, cells_in_group]) %>%
        enframe(name = "cell") %>%
        mutate(group = x)
}) %>%
    dplyr::bind_rows() %>%
    mutate(
        group = factor(
            group,
            levels = sort(unique(embedding$louvain)) %>% rev()
        ),
        category = "UMI distribution"
    ) %>%
    plot_violin_umi(
        x = value,
        y = group,
        z = "category"
    ) +
    # match cluster colors
    ggplot2::scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding$louvain))) %>% rev(.)
    ) +
    ggplot2::scale_fill_manual(
        values = gg_color_hue(n = length(unique(embedding$louvain))) %>% rev(.)
    )

Embedding visualization

EMBEDDING_TITLE_PREFIX <- "t-SNE"

x_column <- "x_tsne"
y_column <- "y_tsne"


Clustering & UMI & batch & genotype

p_embedding_cluster <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding$louvain %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Cluster"),
    label_position = NULL,
    show_color_value_labels = TRUE,
    show_color_legend = FALSE,
    geom_point_size = 0.3,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding$louvain)))
    ) +
    # labs(color = NULL) +
    customized_theme()

p_embedding_batch <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding$batch %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Batch"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.15,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding$batch)))
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()

CB_POSITION <- c(0.83, 0.275)
p_embedding_umi <- plot_embedding_value(
    embedding = embedding[, c(x_column, y_column)],
    color_values = log10(
        Matrix::colSums(matrix_readcount_use[, embedding$cell]) + 1
    ),
    colorbar_position = CB_POSITION,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; UMI"),
    label_position = NULL,
    geom_point_size = 0.4,
    sort_values = TRUE,
    FUN = function(x) x
)

p_embedding_genotype <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding$genotype,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Genotype"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.15,
    sort_values = FALSE
) +
    scale_color_manual(
        # values = gg_color_hue(n = length(unique(embedding$genotype)))
        values = yarrr::piratepal(palette = "google") %>% as.character()
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()


Fig.1b

list(
    p_embedding_cluster,
    p_embedding_umi,
    p_embedding_genotype,
    p_embedding_batch
) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

Batch; separated

purrr::map(sort(unique(embedding$batch)), function(x) {
    plot_embedding(
        embedding = embedding[, c(x_column, y_column)],
        color_values = as.numeric(embedding$batch == x) %>% as.factor(),
        label = paste0(EMBEDDING_TITLE_PREFIX, "; ", x),
        label_position = NULL,
        show_color_value_labels = FALSE,
        show_color_legend = FALSE,
        geom_point_size = 0.18,
        sort_values = TRUE
    ) +
        scale_color_manual(
            values = c("grey70", "salmon")
        )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(ncol = 3) +
    plot_annotation(
        theme = theme(plot.margin = margin())
    )

Expression

matrix_cpm_use <- calc_cpm(matrix_readcount_use)

walk(list(matrix_cpm_use), function(x) {
    print(object.size(x), units = "auto", standard = "SI")
})

# clean up
gc()
## 3.4 GB
##             used   (Mb) gc trigger    (Mb) limit (Mb)   max used    (Mb)
## Ncells   3176585  169.7    6132278   327.5         NA    4144918   221.4
## Vcells 728736863 5559.9 1490893404 11374.7      16384 1464604784 11174.1

Embedding

FEATURES_SELECTED <- c(
    "ENSMUSG00000024621_Csf1r",
    "ENSMUSG00000052336_Cx3cr1",
    "ENSMUSG00000036353_P2ry12",
    "ENSMUSG00000036887_C1qa"
)

purrr::map(FEATURES_SELECTED, function(x) {
    selected_feature <- x

    plot_embedding_value(
        embedding = embedding[, c(x_column, y_column)],
        color_values = log10(
            matrix_cpm_use[selected_feature, embedding$cell] + 1
        ),
        colorbar_position = CB_POSITION,
        label = paste0(EMBEDDING_TITLE_PREFIX, "; ", x),
        # label_position = c(x_label, y_label),
        geom_point_size = 0.4,
        sort_values = TRUE,
        FUN = function(x) x
    )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(nrow = 2) +
    plot_annotation(theme = theme(plot.margin = margin()))

grep(pattern = "itm2a", x = rownames(matrix_cpm_use), ignore.case = TRUE, value = TRUE)

FEATURES_SELECTED <- c(
    # Pan-neuronal
    "ENSMUSG00000026959_Grin1",
    "ENSMUSG00000025576_Rbfox3",
    # Excitatory neuron
    "ENSMUSG00000070570_Slc17a7",
    "ENSMUSG00000038331_Satb2",
    # Astrocyte
    "ENSMUSG00000024411_Aqp4",
    "ENSMUSG00000050953_Gja1",
    # Oligodendrocyte
    "ENSMUSG00000076439_Mog",
    "ENSMUSG00000037625_Cldn11",
    # OPC
    "ENSMUSG00000029231_Pdgfra",
    "ENSMUSG00000021614_Vcan",
    # Endothelial cell
    "ENSMUSG00000017344_Vtn",
    "ENSMUSG00000031239_Itm2a"
)

FEATURES_SELECTED_ANNOTATION <- c(
    "Pan-neuronal", "Excitatory neuron", "Astrocyte",
    "Oligodendrocyte", "OPC", "Endothelial cell"
) %>%
    rep(each = 2)
## [1] "ENSMUSG00000031239_Itm2a"


ED Fig.1b

# cell type specific markers
purrr::map2(FEATURES_SELECTED, FEATURES_SELECTED_ANNOTATION, function(x, y) {
    selected_feature <- x

    plot_embedding_value(
        embedding = embedding[, c(x_column, y_column)],
        color_values = log10(
            matrix_cpm_use[selected_feature, embedding$cell] + 1
        ),
        colorbar_position = CB_POSITION,
        label = paste(
            paste0(EMBEDDING_TITLE_PREFIX, "; ", x %>% str_remove("^E.+_")),
            y,
            sep = "; "
        ),
        # label_position = c(x_label, y_label),
        geom_point_size = 0.4,
        sort_values = TRUE,
        FUN = function(x) x
    )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(ncol = 2) +
    plot_annotation(theme = theme(plot.margin = margin()))


ED Fig.1b

# Inhibitory neuron
FEATURES_SELECTED <- c(
    "ENSMUSG00000026837_Col5a1",
    "ENSMUSG00000070880_Gad1",
    "ENSMUSG00000061762_Tac1",
    "ENSMUSG00000004366_Sst",
    "ENSMUSG00000041592_Sdk2",
    "ENSMUSG00000026787_Gad2",
    "ENSMUSG00000045573_Penk",
    "ENSMUSG00000029819_Npy"
)

purrr::map(FEATURES_SELECTED, function(x) {
    selected_feature <- x

    plot_embedding_value(
        embedding = embedding[, c(x_column, y_column)],
        color_values = log10(
            matrix_cpm_use[selected_feature, embedding$cell] + 1
        ),
        colorbar_position = CB_POSITION,
        label = paste(
            EMBEDDING_TITLE_PREFIX,
            x %>% str_remove("^E.+_"),
            "Inhibitory neuron",
            sep = "; "
        ),
        # label_position = c(x_label, y_label),
        geom_point_size = 0.4,
        sort_values = TRUE,
        FUN = function(x) x
    )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(ncol = 2) +
    plot_annotation(theme = theme(plot.margin = margin()))

Lollipop

clusters_selected_lollipop <- sort(unique(embedding$louvain))
cells_selected_lollipop <- lapply(clusters_selected_lollipop, function(x) {
    embedding$cell[embedding$louvain == x]
})
names(cells_selected_lollipop) <- clusters_selected_lollipop

FEATURES_LOLLIPOP <- c(
    "ENSMUSG00000026959_Grin1",
    "ENSMUSG00000035864_Syt1",
    "ENSMUSG00000025576_Rbfox3",
    "ENSMUSG00000027273_Snap25",
    "ENSMUSG00000059003_Grin2a",
    "ENSMUSG00000070570_Slc17a7",
    "ENSMUSG00000038331_Satb2",
    "ENSMUSG00000070880_Gad1",
    "ENSMUSG00000026787_Gad2",
    "ENSMUSG00000004366_Sst",
    "ENSMUSG00000029819_Npy",
    "ENSMUSG00000031425_Plp1",
    "ENSMUSG00000041607_Mbp",
    "ENSMUSG00000037625_Cldn11",
    "ENSMUSG00000076439_Mog",
    "ENSMUSG00000024617_Camk2a",
    "ENSMUSG00000032532_Cck",
    "ENSMUSG00000033161_Atp1a1",
    "ENSMUSG00000023868_Pde10a",
    "ENSMUSG00000061762_Tac1",
    "ENSMUSG00000045573_Penk",
    "ENSMUSG00000041592_Sdk2",
    "ENSMUSG00000026837_Col5a1",
    "ENSMUSG00000025582_Nptx1",
    "ENSMUSG00000059173_Pde1a",
    "ENSMUSG00000036617_Etl4",
    "ENSMUSG00000005089_Slc1a2",
    "ENSMUSG00000050953_Gja1",
    "ENSMUSG00000024411_Aqp4",
    "ENSMUSG00000021665_Hexb",
    "ENSMUSG00000024621_Csf1r",
    "ENSMUSG00000036887_C1qa",
    "ENSMUSG00000036353_P2ry12",
    "ENSMUSG00000029231_Pdgfra",
    "ENSMUSG00000021614_Vcan",
    "ENSMUSG00000032911_Cspg4",
    "ENSMUSG00000046160_Olig1",
    "ENSMUSG00000017344_Vtn",
    "ENSMUSG00000029648_Flt1",
    "ENSMUSG00000041378_Cldn5"
)

p_dotplot_features_selected <- plot_lollipop(
    cells = cells_selected_lollipop,
    features = FEATURES_LOLLIPOP,
    matrix_cpm = matrix_cpm_use
)
p_dotplot_features_selected

Violin

FEATURES_VIOLIN <- c(
    "ENSMUSG00000005533_Igf1r",
    "ENSMUSG00000022265_Ank",
    "ENSMUSG00000002985_Apoe",
    "ENSMUSG00000029207_Apbb2",
    "ENSMUSG00000021109_Hif1a",
    "ENSMUSG00000007891_Ctsd",
    "ENSMUSG00000023992_Trem2"
)

plot_violin(
    cells = cells_selected_lollipop,
    features = FEATURES_VIOLIN,
    matrix_cpm = matrix_cpm_use,
    x_range_breaks = NULL
) +
    # match cluster colors
    ggplot2::scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding$louvain))) %>% rev(.)
    ) +
    ggplot2::scale_fill_manual(
        values = gg_color_hue(n = length(unique(embedding$louvain))) %>% rev(.)
    )

Cluster composition

p_barplot_cluster_composition_batch <- prepare_cluster_composition(
    embedding = embedding,
    x = louvain,
    group = batch
) %>%
    mutate(
        louvain = as.factor(louvain)
    ) %>%
    plot_barplot(
        x = louvain,
        y = percentage,
        z = batch
    )


` Fig.1e

p_barplot_cluster_composition_batch

p_barplot_cluster_composition_genotype <- prepare_cluster_composition(
    embedding = embedding,
    x = louvain,
    group = genotype
) %>%
    mutate(
        louvain = as.factor(louvain)
    ) %>%
    plot_barplot(
        x = louvain,
        y = percentage,
        z = genotype
    ) +
    scale_fill_manual(
        values = yarrr::piratepal(palette = "google") %>% as.character()
    )
p_barplot_cluster_composition_genotype

Subclustering microglia

embedding_microglia <- read_csv(
    file = file.path(
        PROJECT_DIR,
        "clustering",
        "subclustering",
        "exploring",
        "embedding_ncomponents8_ccc1_seed20200416.csv.gz"
    )
) %>%
    left_join(
        cell_metadata_PRJNA590042 %>%
            select(sample_name, genotype = mouse_genotype),
        by = c("batch" = "sample_name")
    ) %>%
    mutate(
        genotype = factor(
            genotype,
            levels = c("wt", "5XFAD", "Trem2-/-", "Trem2-/- 5XFAD")
        ) %>% fct_recode(WT = "wt")
    )
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   cell = col_character(),
##   batch = col_character(),
##   louvain = col_double(),
##   x_tsne = col_double(),
##   y_tsne = col_double(),
##   x_umap = col_double(),
##   y_umap = col_double(),
##   x_fitsne = col_double(),
##   y_fitsne = col_double(),
##   x_phate = col_double(),
##   y_phate = col_double(),
##   `x_min_dist=0.1` = col_double(),
##   `y_min_dist=0.1` = col_double(),
##   x_multicoretsne = col_double(),
##   y_multicoretsne = col_double()
## )
matrix_readcount_use <- matrix_readcount_use[, embedding_microglia$cell]
matrix_cpm_use <- calc_cpm(matrix_readcount_use)

walk(list(matrix_readcount_use, matrix_cpm_use), function(x) {
    print(object.size(x), units = "auto", standard = "SI")
})
## 95.5 MB
## 95.5 MB

Embedding visualization

EMBEDDING_TITLE_PREFIX <- "t-SNE"

x_column <- "x_tsne"
y_column <- "y_tsne"


Clustering & UMI & batch & genotype

p_embedding_microglia_cluster <- plot_embedding(
    embedding = embedding_microglia[, c(x_column, y_column)],
    color_values = embedding_microglia$louvain %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Cluster"),
    label_position = NULL,
    show_color_value_labels = TRUE,
    show_color_legend = FALSE,
    geom_point_size = 0.6,
    sort_values = FALSE
) +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 20")(
            length(unique(embedding_microglia$louvain))
        )
    ) +
    # labs(color = NULL) +
    customized_theme()

p_embedding_microglia_batch <- plot_embedding(
    embedding = embedding_microglia[, c(x_column, y_column)],
    color_values = embedding_microglia$batch %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Batch"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.35,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding_microglia$batch)))
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()

CB_POSITION <- c(0.83, 0.275)
p_embedding_microglia_umi <- plot_embedding_value(
    embedding = embedding_microglia[, c(x_column, y_column)],
    color_values = log10(
        Matrix::colSums(matrix_readcount_use[, embedding_microglia$cell]) + 1
    ),
    colorbar_position = CB_POSITION,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; UMI"),
    label_position = NULL,
    geom_point_size = 0.6,
    sort_values = TRUE,
    FUN = function(x) x
)

p_embedding_microglia_genotype <- plot_embedding(
    embedding = embedding_microglia[, c(x_column, y_column)],
    color_values = embedding_microglia$genotype,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Genotype"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.4,
    sort_values = FALSE
) +
    scale_color_manual(
        # values = gg_color_hue(n = length(unique(embedding$genotype)))
        values = yarrr::piratepal(palette = "google") %>% as.character()
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()


Fig.2e

list(
    p_embedding_microglia_cluster,
    p_embedding_microglia_umi,
    p_embedding_microglia_genotype,
    p_embedding_microglia_batch
) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

Expression

Embedding


Fig.2g

FEATURES_SELECTED <- c(
    "ENSMUSG00000024621_Csf1r",
    "ENSMUSG00000052336_Cx3cr1",
    "ENSMUSG00000036353_P2ry12",
    "ENSMUSG00000036887_C1qa"
)

purrr::map(FEATURES_SELECTED, function(x) {
    selected_feature <- x

    plot_embedding_value(
        embedding = embedding_microglia[, c(x_column, y_column)],
        color_values = log10(
            matrix_cpm_use[selected_feature, embedding_microglia$cell] + 1
        ),
        colorbar_position = CB_POSITION,
        label = paste0(EMBEDDING_TITLE_PREFIX, "; ", x),
        # label_position = c(x_label, y_label),
        geom_point_size = 0.6,
        sort_values = TRUE,
        FUN = function(x) x
    )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(nrow = 2) +
    plot_annotation(theme = theme(plot.margin = margin()))

Cluster composition

Batch

p_barplot_cluster_composition_batch <- prepare_cluster_composition(
    embedding = embedding_microglia,
    x = louvain,
    group = batch
) %>%
    mutate(
        louvain = as.factor(louvain)
    ) %>%
    plot_barplot(
        x = louvain,
        y = percentage,
        z = batch
    )
## `summarise()` has ungrouped output. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'louvain'. You can override using the `.groups` argument.
# colorspace::scale_fill_discrete_qualitative(palette = "Dark 3")
p_barplot_cluster_composition_batch

Genotype

p_barplot_cluster_composition_genotype <- prepare_cluster_composition(
    embedding = embedding_microglia,
    x = louvain,
    group = genotype
) %>%
    mutate(
        louvain = as.factor(louvain)
    ) %>%
    plot_barplot(
        x = louvain,
        y = percentage,
        z = genotype
    ) +
    scale_fill_manual(
        values = yarrr::piratepal(palette = "google") %>% as.character()
    )
p_barplot_cluster_composition_genotype

Genotype; reversed


Fig.2f

prepare_cluster_composition(
    embedding = embedding_microglia,
    x = genotype,
    group = louvain
) %>%
    arrange(louvain) %>%
    mutate(
        louvain = factor(louvain),
        genotype = fct_rev(genotype)
    ) %>%
    plot_barplot(x = genotype, y = percentage, z = louvain) +
    coord_flip() +
    scale_fill_manual(
        values = ggthemes::tableau_color_pal("Tableau 20")(
            length(unique(embedding_microglia$louvain))
        )
    )

Heatmap

FEATURES_HEATMAP <- c(
    "ENSMUSG00000018126_Baiap2l2",
    "ENSMUSG00000068129_Cst7",
    "ENSMUSG00000014599_Csf1",
    "ENSMUSG00000015568_Lpl",
    # "ENSMUSG00000062593_Lilrb4a",
    "ENSMUSG00000030789_Itgax",
    "ENSMUSG00000005533_Igf1r",
    "ENSMUSG00000000982_Ccl3",
    "ENSMUSG00000050370_Ch25h",
    "ENSMUSG00000022265_Ank",
    "ENSMUSG00000002602_Axl",
    "ENSMUSG00000024610_Cd74",
    "ENSMUSG00000002985_Apoe",
    "ENSMUSG00000069516_Lyz2",
    "ENSMUSG00000029207_Apbb2",
    "ENSMUSG00000021109_Hif1a",
    "ENSMUSG00000018927_Ccl6",
    "ENSMUSG00000029816_Gpnmb",
    "ENSMUSG00000025351_Cd63",
    "ENSMUSG00000073411_H2-D1",
    "ENSMUSG00000007891_Ctsd",
    "ENSMUSG00000023992_Trem2",
    "ENSMUSG00000052336_Cx3cr1",
    "ENSMUSG00000033192_Lpcat2",
    "ENSMUSG00000048163_Selplg",
    "ENSMUSG00000036353_P2ry12",
    "ENSMUSG00000054675_Tmem119",
    "ENSMUSG00000079227_Ccr5",
    "ENSMUSG00000029343_Crybb1"
)

matrix_heatmap <- sapply(sort(unique(embedding_microglia$batch)), function(x) {
    cells_in_group <- embedding_microglia %>%
        filter(batch == x) %>%
        pull(cell)

    Matrix::rowMeans(matrix_cpm_use[FEATURES_HEATMAP, cells_in_group])
})

matrix_heatmap <- log10(matrix_heatmap + 1)
matrix_heatmap <- t(scale(t(matrix_heatmap)))
rownames(matrix_heatmap) <- str_remove(
    string = rownames(matrix_heatmap),
    pattern = "^E.+_"
)


Fig.2c

ComplexHeatmap::Heatmap(
    matrix = matrix_heatmap,
    col = wesanderson::wes_palette("Zissou1", 100, type = "continuous"),
    rect_gp = grid::gpar(col = "white", lwd = 0.1),
    #
    cluster_rows = TRUE,
    cluster_columns = FALSE,
    #
    row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    #
    heatmap_legend_param = list(
        title = "Z score",
        title_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 6
        ),
        legend_direction = "vertical",
        labels_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        legend_height = unit(20, "mm"),
        legend_width = unit(8, "mm")
    )
    # heatmap_width = unit(8, "cm")
    # heatmap_height = unit(8, "cm")
)

R session info

devtools::session_info()$platform
##  setting  value                       
##  version  R version 4.0.3 (2020-10-10)
##  os       macOS  11.1                 
##  system   x86_64, darwin20.1.0        
##  ui       unknown                     
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/Chicago             
##  date     2020-12-17


devtools::session_info()$pack %>%
    as_tibble() %>%
    dplyr::select(
        package,
        loadedversion,
        date,
        `source`
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")
package loadedversion date source
assertthat 0.2.1 2019-03-21 CRAN (R 4.0.0)
backports 1.2.1 2020-12-09 CRAN (R 4.0.3)
BayesFactor 0.9.12-4.2 2018-05-19 CRAN (R 4.0.3)
BiocGenerics 0.36.0 2020-10-27 Bioconductor
broom 0.7.3.9000 2020-12-17 Github (tidymodels/broom@1c00cc8)
Cairo 1.5-12.2 2020-07-07 CRAN (R 4.0.3)
callr 3.5.1.9000 2020-11-23 Github (r-lib/callr@d44e660)
cellranger 1.1.0 2016-07-27 CRAN (R 4.0.0)
checkmate 2.0.0 2020-02-06 CRAN (R 4.0.0)
circlize 0.4.11 2020-10-31 CRAN (R 4.0.3)
cli 2.2.0 2020-11-20 CRAN (R 4.0.3)
clue 0.3-58 2020-12-03 CRAN (R 4.0.3)
cluster 2.1.0 2019-06-19 CRAN (R 4.0.3)
coda 0.19-4 2020-09-30 CRAN (R 4.0.2)
colorspace 2.0-0 2020-11-10 R-Forge (R 4.0.3)
ComplexHeatmap 2.6.2 2020-11-12 Bioconductor
crayon 1.3.4 2017-09-16 CRAN (R 4.0.0)
DBI 1.1.0 2019-12-15 CRAN (R 4.0.0)
dbplyr 2.0.0 2020-11-03 CRAN (R 4.0.3)
desc 1.2.0 2018-05-01 CRAN (R 4.0.0)
devtools 2.3.1.9000 2020-12-05 Github (r-lib/devtools@b3be019)
digest 0.6.27 2020-10-24 CRAN (R 4.0.3)
dplyr 1.0.2.9000 2020-12-14 Github (tidyverse/dplyr@59105eb)
ellipsis 0.3.1 2020-05-15 CRAN (R 4.0.3)
evaluate 0.14 2019-05-28 CRAN (R 4.0.0)
extrafont 0.17 2014-12-08 CRAN (R 4.0.2)
extrafontdb 1.0 2012-06-11 CRAN (R 4.0.0)
fansi 0.4.1 2020-01-08 CRAN (R 4.0.0)
farver 2.0.3 2020-01-16 CRAN (R 4.0.0)
forcats 0.5.0.9000 2020-12-09 Github (tidyverse/forcats@6c5e59f)
fs 1.5.0 2020-07-31 CRAN (R 4.0.3)
generics 0.1.0 2020-10-31 CRAN (R 4.0.3)
GetoptLong 1.0.5 2020-12-15 CRAN (R 4.0.3)
ggplot2 3.3.2.9000 2020-12-14 Github (tidyverse/ggplot2@9deb97b)
ggrepel 0.9.0 2020-12-16 CRAN (R 4.0.3)
ggthemes 4.2.0 2020-12-10 Github (jrnold/ggthemes@fa3d1ac)
GlobalOptions 0.1.2 2020-06-10 CRAN (R 4.0.0)
glue 1.4.2 2020-08-27 CRAN (R 4.0.2)
gt 0.2.2 2020-11-25 Github (rstudio/gt@bae32f4)
gtable 0.3.0 2019-03-25 CRAN (R 4.0.0)
gtools 3.8.2 2020-03-31 CRAN (R 4.0.0)
haven 2.3.1 2020-06-01 CRAN (R 4.0.0)
hms 0.5.3 2020-01-08 CRAN (R 4.0.0)
htmltools 0.5.0.9003 2020-12-03 Github (rstudio/htmltools@d18bd8e)
httr 1.4.2 2020-07-20 CRAN (R 4.0.2)
IRanges 2.24.1 2020-12-12 Bioconductor
jpeg 0.1-8.1 2019-10-24 CRAN (R 4.0.0)
jsonlite 1.7.2 2020-12-09 CRAN (R 4.0.3)
knitr 1.30.4 2020-12-15 Github (yihui/knitr@f8f90ba)
labeling 0.4.2 2020-10-20 CRAN (R 4.0.3)
lattice 0.20-41 2020-04-02 CRAN (R 4.0.3)
lifecycle 0.2.0 2020-03-06 CRAN (R 4.0.0)
lubridate 1.7.9.2 2020-12-17 Github (tidyverse/lubridate@8853a2a)
magick 2.5.2 2020-11-10 CRAN (R 4.0.3)
magrittr 2.0.1.9000 2020-12-14 Github (tidyverse/magrittr@bb1c86a)
Matrix 1.2-18 2019-11-27 CRAN (R 4.0.2)
MatrixModels 0.4-1 2015-08-22 CRAN (R 4.0.0)
matrixStats 0.57.0 2020-09-25 CRAN (R 4.0.2)
memoise 1.1.0 2017-04-21 CRAN (R 4.0.0)
modelr 0.1.8.9000 2020-11-23 Github (tidyverse/modelr@16168e0)
munsell 0.5.0 2018-06-12 CRAN (R 4.0.0)
mvtnorm 1.1-1 2020-06-09 CRAN (R 4.0.2)
patchwork 1.1.1 2020-12-17 CRAN (R 4.0.3)
pbapply 1.4-3 2020-08-18 CRAN (R 4.0.2)
pillar 1.4.7 2020-11-20 CRAN (R 4.0.3)
pkgbuild 1.2.0 2020-12-15 CRAN (R 4.0.3)
pkgconfig 2.0.3 2019-09-22 CRAN (R 4.0.0)
pkgload 1.1.0 2020-05-29 CRAN (R 4.0.0)
png 0.1-7 2013-12-03 CRAN (R 4.0.0)
prettyunits 1.1.1.9000 2020-11-23 Github (r-lib/prettyunits@b1cdad8)
processx 3.4.5 2020-11-30 CRAN (R 4.0.3)
ps 1.5.0 2020-12-05 CRAN (R 4.0.3)
purrr 0.3.4.9000 2020-11-23 Github (tidyverse/purrr@af06d45)
R6 2.5.0 2020-11-02 Github (r-lib/R6@6cf7d4e)
ragg 1.0.0.9000 2020-12-16 Github (r-lib/ragg@cd1a50f)
rappdirs 0.3.1 2016-03-28 CRAN (R 4.0.0)
RColorBrewer 1.1-2 2014-12-07 CRAN (R 4.0.0)
Rcpp 1.0.5 2020-07-06 CRAN (R 4.0.3)
readr 1.4.0.9000 2020-11-23 Github (tidyverse/readr@97186a8)
readxl 1.3.1.9000 2020-11-23 Github (tidyverse/readxl@9f85fa5)
remotes 2.2.0.9000 2020-12-03 Github (r-lib/remotes@5d0d9fd)
reprex 0.3.0 2019-05-16 CRAN (R 4.0.0)
reticulate 1.18 2020-10-25 CRAN (R 4.0.3)
rjson 0.2.20 2018-06-08 CRAN (R 4.0.0)
rlang 0.4.9.9000 2020-12-17 Github (r-lib/rlang@b9b7ae2)
rmarkdown 2.6.0001 2020-12-17 Github (rstudio/rmarkdown@7f3cd84)
rprojroot 2.0.2 2020-11-15 CRAN (R 4.0.3)
rstudioapi 0.13 2020-11-12 CRAN (R 4.0.3)
Rttf2pt1 1.3.8 2020-01-10 CRAN (R 4.0.0)
rvest 0.3.6 2020-07-25 CRAN (R 4.0.2)
S4Vectors 0.28.1 2020-12-09 Bioconductor
sass 0.2.0.9005 2020-12-01 Github (rstudio/sass@1dda864)
scales 1.1.1 2020-05-11 CRAN (R 4.0.3)
sessioninfo 1.1.1 2018-11-05 CRAN (R 4.0.3)
shape 1.4.5 2020-09-13 CRAN (R 4.0.2)
stringi 1.5.3 2020-09-09 CRAN (R 4.0.2)
stringr 1.4.0.9000 2020-11-23 Github (tidyverse/stringr@1f03eb0)
styler 1.3.2.9000 2020-11-30 Github (r-lib/styler@d5b8b0e)
systemfonts 0.3.2.9000 2020-12-17 Github (r-lib/systemfonts@d959a15)
testthat 3.0.1 2020-12-17 Github (r-lib/testthat@e99155a)
textshaping 0.2.1.9000 2020-12-17 Github (r-lib/textshaping@46c74ee)
tibble 3.0.4.9000 2020-12-13 Github (tidyverse/tibble@5881b43)
tidyr 1.1.2.9000 2020-12-09 Github (tidyverse/tidyr@581b488)
tidyselect 1.1.0 2020-05-11 CRAN (R 4.0.3)
tidyverse 1.3.0.9000 2020-11-23 Github (hadley/tidyverse@8a0bb99)
usethis 2.0.0.9000 2020-12-13 Github (r-lib/usethis@45d0fef)
vctrs 0.3.6 2020-12-17 CRAN (R 4.0.3)
viridisLite 0.3.0 2018-02-01 CRAN (R 4.0.0)
wesanderson 0.3.6 2018-04-20 CRAN (R 4.0.3)
withr 2.3.0 2020-09-22 CRAN (R 4.0.2)
xfun 0.19 2020-10-30 CRAN (R 4.0.3)
xml2 1.3.2 2020-04-23 CRAN (R 4.0.0)
yaml 2.2.1 2020-02-01 CRAN (R 4.0.0)
yarrr 0.1.5 2017-04-19 CRAN (R 4.0.3)