Zhou, Y., Song, W.M., Andhey, P.S., Swain, A., Levy, T., Miller, K.R., Poliani, P.L., Cominelli, M., Grover, S., Gilfillan, S., et al. (2020). Human and mouse single-nucleus transcriptomics reveal TREM2-dependent and TREM2-independent cellular responses in Alzheimer’s disease. Nat. Med. 26, 131–142.

BioProject Accession: PRJNA590042
GEO Accession: GSE140510

Load required packages.

library(tidyverse)
library(magrittr)
library(Matrix)
library(extrafont)
library(ggrepel)
library(patchwork)
# library(tidylog)

Sys.time()

## [1] "2020-12-17 19:25:41 CST"

Data preparation

Functions loading

source(
    file = file.path(
        SCRIPT_DIR,
        "utilities.R"
    )
)

load_matrix <- function(x) {
    matrix_readcount_use <- scipy.sparse$load_npz(
        file.path(x, "matrix_readcount.npz")
    )

    colnames(matrix_readcount_use) <- np$load(
        file.path(x, "matrix_readcount_barcodes.npy")
    )

    rownames(matrix_readcount_use) <- np$load(
        file.path(x, "matrix_readcount_features.npy")
    )

    return(matrix_readcount_use)
}

Data loading

PROJECT_DIR <- "/Users/jialei/Dropbox/Data/Projects/UTSW/Collaborations/Microglia/raw/public/PRJNA590042/remapped"

Matrix

np <- reticulate::import("numpy", convert = TRUE)
scipy.sparse <- reticulate::import(module = "scipy.sparse", convert = TRUE)

matrix_dir <- list(
    "matrices"
)

matrix_readcount_use <- purrr::map(matrix_dir, function(x) {
    load_matrix(file.path(PROJECT_DIR, x))
}) %>%
    purrr::reduce(cbind)

walk(list(matrix_readcount_use, matrix_dir), function(x) {
    print(object.size(x), units = "auto", standard = "SI")
})

## 3.4 GB
## 176 B

# clean up
rm(matrix_dir)

Embedding

EMBEDDING_FILE <- "embedding_ncomponents35_ccc1_seed20200416.csv.gz"

embedding <- read_csv(
    file = file.path(
        PROJECT_DIR,
        "clustering",
        "exploring",
        EMBEDDING_FILE
    )
)

# clean up
rm(EMBEDDING_FILE)

Metadata

cell_metadata_PRJNA590042 <- read_delim(
    file.path(
        PROJECT_DIR,
        "raw/",
        "SraRunTable.csv"
    ),
    delim = ","
) %>%
    select(
        run = Run,
        age = Age,
        sample_name = `Sample Name`,
        mouse_genotype,
        source_name
    )

embedding %<>%
    left_join(
        cell_metadata_PRJNA590042 %>%
            select(sample_name, genotype = mouse_genotype),
        by = c("batch" = "sample_name")
    ) %>%
    mutate(
        genotype = factor(
            genotype,
            levels = c("wt", "5XFAD", "Trem2-/-", "Trem2-/- 5XFAD")
        ) %>% fct_recode(WT = "wt")
    )

cell_metadata_PRJNA590042

Distinguishing major brain-cell types

Summary

embedding %>%
    mutate(
        num_umis = colSums(matrix_readcount_use[, cell]),
        num_genes = colSums(matrix_readcount_use[, cell] > 0)
    ) %>%
    group_by(louvain) %>%
    summarise(
        num_cells = n(),
        median_umis = median(num_umis),
        median_genes = median(num_genes)
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")

## `summarise()` has ungrouped output. You can override using the `.groups` argument.

louvain	num_cells	median_umis	median_genes
0	8953	2509.0	1439.0
1	7856	14239.0	4261.0
2	6987	13770.0	4518.0
3	6372	11030.0	3962.5
4	6252	13780.0	4370.0
5	5856	701.5	547.0
6	5070	18558.5	5025.0
7	5029	16350.0	4841.0
8	4479	3236.0	1718.0
9	3972	731.0	538.0
10	3906	4403.0	2246.0
11	3696	2704.0	1721.0
12	2964	11285.0	4214.5
13	2484	12135.5	4364.5
14	2316	8534.5	3514.5
15	2215	5280.0	2586.0
16	1752	23367.5	5638.0
17	1740	10955.5	4105.5
18	1610	6057.5	2650.0
19	996	7048.0	3065.0
20	906	10770.5	3921.5
21	814	5551.0	2565.0
22	682	16883.5	4925.0
23	584	1835.0	1225.5
24	537	884.0	640.0
25	528	2872.0	1742.0
26	474	3503.0	2050.5
27	312	4221.5	2078.0
28	238	2310.5	1524.5
29	134	13254.0	4724.0
30	112	5206.0	2697.0
31	47	9194.0	3847.0

embedding %>%
    mutate(
        num_umis = colSums(matrix_readcount_use[, cell]),
        num_genes = colSums(matrix_readcount_use[, cell] > 0)
    ) %>%
    group_by(genotype) %>%
    summarise(
        num_cells = n(),
        median_umis = median(num_umis),
        median_genes = median(num_genes)
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")

## `summarise()` has ungrouped output. You can override using the `.groups` argument.

genotype	num_cells	median_umis	median_genes
WT	24034	7677.0	3313.5
5XFAD	22069	5824.0	2633.0
Trem2-/-	22884	7802.0	3325.0
Trem2-/- 5XFAD	20886	8696.5	3472.5

purrr::map(sort(unique(embedding$louvain)), function(x) {
    cells_in_group <- embedding %>%
        filter(louvain == x) %>%
        pull(cell)

    colSums(matrix_readcount_use[, cells_in_group]) %>%
        enframe(name = "cell") %>%
        mutate(group = x)
}) %>%
    dplyr::bind_rows() %>%
    mutate(
        group = factor(
            group,
            levels = sort(unique(embedding$louvain)) %>% rev()
        ),
        category = "UMI distribution"
    ) %>%
    plot_violin_umi(
        x = value,
        y = group,
        z = "category"
    ) +
    # match cluster colors
    ggplot2::scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding$louvain))) %>% rev(.)
    ) +
    ggplot2::scale_fill_manual(
        values = gg_color_hue(n = length(unique(embedding$louvain))) %>% rev(.)
    )

Embedding visualization

EMBEDDING_TITLE_PREFIX <- "t-SNE"

x_column <- "x_tsne"
y_column <- "y_tsne"

Clustering & UMI & batch & genotype

p_embedding_cluster <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding$louvain %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Cluster"),
    label_position = NULL,
    show_color_value_labels = TRUE,
    show_color_legend = FALSE,
    geom_point_size = 0.3,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding$louvain)))
    ) +
    # labs(color = NULL) +
    customized_theme()

p_embedding_batch <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding$batch %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Batch"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.15,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding$batch)))
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()

CB_POSITION <- c(0.83, 0.275)
p_embedding_umi <- plot_embedding_value(
    embedding = embedding[, c(x_column, y_column)],
    color_values = log10(
        Matrix::colSums(matrix_readcount_use[, embedding$cell]) + 1
    ),
    colorbar_position = CB_POSITION,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; UMI"),
    label_position = NULL,
    geom_point_size = 0.4,
    sort_values = TRUE,
    FUN = function(x) x
)

p_embedding_genotype <- plot_embedding(
    embedding = embedding[, c(x_column, y_column)],
    color_values = embedding$genotype,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Genotype"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.15,
    sort_values = FALSE
) +
    scale_color_manual(
        # values = gg_color_hue(n = length(unique(embedding$genotype)))
        values = yarrr::piratepal(palette = "google") %>% as.character()
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()

Fig.1b

list(
    p_embedding_cluster,
    p_embedding_umi,
    p_embedding_genotype,
    p_embedding_batch
) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

Batch; separated

purrr::map(sort(unique(embedding$batch)), function(x) {
    plot_embedding(
        embedding = embedding[, c(x_column, y_column)],
        color_values = as.numeric(embedding$batch == x) %>% as.factor(),
        label = paste0(EMBEDDING_TITLE_PREFIX, "; ", x),
        label_position = NULL,
        show_color_value_labels = FALSE,
        show_color_legend = FALSE,
        geom_point_size = 0.18,
        sort_values = TRUE
    ) +
        scale_color_manual(
            values = c("grey70", "salmon")
        )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(ncol = 3) +
    plot_annotation(
        theme = theme(plot.margin = margin())
    )

Expression

matrix_cpm_use <- calc_cpm(matrix_readcount_use)

walk(list(matrix_cpm_use), function(x) {
    print(object.size(x), units = "auto", standard = "SI")
})

# clean up
gc()

## 3.4 GB
##             used   (Mb) gc trigger    (Mb) limit (Mb)   max used    (Mb)
## Ncells   3176585  169.7    6132278   327.5         NA    4144918   221.4
## Vcells 728736863 5559.9 1490893404 11374.7      16384 1464604784 11174.1

Embedding

FEATURES_SELECTED <- c(
    "ENSMUSG00000024621_Csf1r",
    "ENSMUSG00000052336_Cx3cr1",
    "ENSMUSG00000036353_P2ry12",
    "ENSMUSG00000036887_C1qa"
)

purrr::map(FEATURES_SELECTED, function(x) {
    selected_feature <- x

    plot_embedding_value(
        embedding = embedding[, c(x_column, y_column)],
        color_values = log10(
            matrix_cpm_use[selected_feature, embedding$cell] + 1
        ),
        colorbar_position = CB_POSITION,
        label = paste0(EMBEDDING_TITLE_PREFIX, "; ", x),
        # label_position = c(x_label, y_label),
        geom_point_size = 0.4,
        sort_values = TRUE,
        FUN = function(x) x
    )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(nrow = 2) +
    plot_annotation(theme = theme(plot.margin = margin()))

grep(pattern = "itm2a", x = rownames(matrix_cpm_use), ignore.case = TRUE, value = TRUE)

FEATURES_SELECTED <- c(
    # Pan-neuronal
    "ENSMUSG00000026959_Grin1",
    "ENSMUSG00000025576_Rbfox3",
    # Excitatory neuron
    "ENSMUSG00000070570_Slc17a7",
    "ENSMUSG00000038331_Satb2",
    # Astrocyte
    "ENSMUSG00000024411_Aqp4",
    "ENSMUSG00000050953_Gja1",
    # Oligodendrocyte
    "ENSMUSG00000076439_Mog",
    "ENSMUSG00000037625_Cldn11",
    # OPC
    "ENSMUSG00000029231_Pdgfra",
    "ENSMUSG00000021614_Vcan",
    # Endothelial cell
    "ENSMUSG00000017344_Vtn",
    "ENSMUSG00000031239_Itm2a"
)

FEATURES_SELECTED_ANNOTATION <- c(
    "Pan-neuronal", "Excitatory neuron", "Astrocyte",
    "Oligodendrocyte", "OPC", "Endothelial cell"
) %>%
    rep(each = 2)

## [1] "ENSMUSG00000031239_Itm2a"

ED Fig.1b

# cell type specific markers
purrr::map2(FEATURES_SELECTED, FEATURES_SELECTED_ANNOTATION, function(x, y) {
    selected_feature <- x

    plot_embedding_value(
        embedding = embedding[, c(x_column, y_column)],
        color_values = log10(
            matrix_cpm_use[selected_feature, embedding$cell] + 1
        ),
        colorbar_position = CB_POSITION,
        label = paste(
            paste0(EMBEDDING_TITLE_PREFIX, "; ", x %>% str_remove("^E.+_")),
            y,
            sep = "; "
        ),
        # label_position = c(x_label, y_label),
        geom_point_size = 0.4,
        sort_values = TRUE,
        FUN = function(x) x
    )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(ncol = 2) +
    plot_annotation(theme = theme(plot.margin = margin()))

ED Fig.1b

# Inhibitory neuron
FEATURES_SELECTED <- c(
    "ENSMUSG00000026837_Col5a1",
    "ENSMUSG00000070880_Gad1",
    "ENSMUSG00000061762_Tac1",
    "ENSMUSG00000004366_Sst",
    "ENSMUSG00000041592_Sdk2",
    "ENSMUSG00000026787_Gad2",
    "ENSMUSG00000045573_Penk",
    "ENSMUSG00000029819_Npy"
)

purrr::map(FEATURES_SELECTED, function(x) {
    selected_feature <- x

    plot_embedding_value(
        embedding = embedding[, c(x_column, y_column)],
        color_values = log10(
            matrix_cpm_use[selected_feature, embedding$cell] + 1
        ),
        colorbar_position = CB_POSITION,
        label = paste(
            EMBEDDING_TITLE_PREFIX,
            x %>% str_remove("^E.+_"),
            "Inhibitory neuron",
            sep = "; "
        ),
        # label_position = c(x_label, y_label),
        geom_point_size = 0.4,
        sort_values = TRUE,
        FUN = function(x) x
    )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(ncol = 2) +
    plot_annotation(theme = theme(plot.margin = margin()))

Lollipop

clusters_selected_lollipop <- sort(unique(embedding$louvain))
cells_selected_lollipop <- lapply(clusters_selected_lollipop, function(x) {
    embedding$cell[embedding$louvain == x]
})
names(cells_selected_lollipop) <- clusters_selected_lollipop

FEATURES_LOLLIPOP <- c(
    "ENSMUSG00000026959_Grin1",
    "ENSMUSG00000035864_Syt1",
    "ENSMUSG00000025576_Rbfox3",
    "ENSMUSG00000027273_Snap25",
    "ENSMUSG00000059003_Grin2a",
    "ENSMUSG00000070570_Slc17a7",
    "ENSMUSG00000038331_Satb2",
    "ENSMUSG00000070880_Gad1",
    "ENSMUSG00000026787_Gad2",
    "ENSMUSG00000004366_Sst",
    "ENSMUSG00000029819_Npy",
    "ENSMUSG00000031425_Plp1",
    "ENSMUSG00000041607_Mbp",
    "ENSMUSG00000037625_Cldn11",
    "ENSMUSG00000076439_Mog",
    "ENSMUSG00000024617_Camk2a",
    "ENSMUSG00000032532_Cck",
    "ENSMUSG00000033161_Atp1a1",
    "ENSMUSG00000023868_Pde10a",
    "ENSMUSG00000061762_Tac1",
    "ENSMUSG00000045573_Penk",
    "ENSMUSG00000041592_Sdk2",
    "ENSMUSG00000026837_Col5a1",
    "ENSMUSG00000025582_Nptx1",
    "ENSMUSG00000059173_Pde1a",
    "ENSMUSG00000036617_Etl4",
    "ENSMUSG00000005089_Slc1a2",
    "ENSMUSG00000050953_Gja1",
    "ENSMUSG00000024411_Aqp4",
    "ENSMUSG00000021665_Hexb",
    "ENSMUSG00000024621_Csf1r",
    "ENSMUSG00000036887_C1qa",
    "ENSMUSG00000036353_P2ry12",
    "ENSMUSG00000029231_Pdgfra",
    "ENSMUSG00000021614_Vcan",
    "ENSMUSG00000032911_Cspg4",
    "ENSMUSG00000046160_Olig1",
    "ENSMUSG00000017344_Vtn",
    "ENSMUSG00000029648_Flt1",
    "ENSMUSG00000041378_Cldn5"
)

p_dotplot_features_selected <- plot_lollipop(
    cells = cells_selected_lollipop,
    features = FEATURES_LOLLIPOP,
    matrix_cpm = matrix_cpm_use
)

p_dotplot_features_selected

Violin

FEATURES_VIOLIN <- c(
    "ENSMUSG00000005533_Igf1r",
    "ENSMUSG00000022265_Ank",
    "ENSMUSG00000002985_Apoe",
    "ENSMUSG00000029207_Apbb2",
    "ENSMUSG00000021109_Hif1a",
    "ENSMUSG00000007891_Ctsd",
    "ENSMUSG00000023992_Trem2"
)

plot_violin(
    cells = cells_selected_lollipop,
    features = FEATURES_VIOLIN,
    matrix_cpm = matrix_cpm_use,
    x_range_breaks = NULL
) +
    # match cluster colors
    ggplot2::scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding$louvain))) %>% rev(.)
    ) +
    ggplot2::scale_fill_manual(
        values = gg_color_hue(n = length(unique(embedding$louvain))) %>% rev(.)
    )

Cluster composition

p_barplot_cluster_composition_batch <- prepare_cluster_composition(
    embedding = embedding,
    x = louvain,
    group = batch
) %>%
    mutate(
        louvain = as.factor(louvain)
    ) %>%
    plot_barplot(
        x = louvain,
        y = percentage,
        z = batch
    )

` Fig.1e

p_barplot_cluster_composition_batch

p_barplot_cluster_composition_genotype <- prepare_cluster_composition(
    embedding = embedding,
    x = louvain,
    group = genotype
) %>%
    mutate(
        louvain = as.factor(louvain)
    ) %>%
    plot_barplot(
        x = louvain,
        y = percentage,
        z = genotype
    ) +
    scale_fill_manual(
        values = yarrr::piratepal(palette = "google") %>% as.character()
    )

p_barplot_cluster_composition_genotype

Subclustering microglia

embedding_microglia <- read_csv(
    file = file.path(
        PROJECT_DIR,
        "clustering",
        "subclustering",
        "exploring",
        "embedding_ncomponents8_ccc1_seed20200416.csv.gz"
    )
) %>%
    left_join(
        cell_metadata_PRJNA590042 %>%
            select(sample_name, genotype = mouse_genotype),
        by = c("batch" = "sample_name")
    ) %>%
    mutate(
        genotype = factor(
            genotype,
            levels = c("wt", "5XFAD", "Trem2-/-", "Trem2-/- 5XFAD")
        ) %>% fct_recode(WT = "wt")
    )

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   cell = col_character(),
##   batch = col_character(),
##   louvain = col_double(),
##   x_tsne = col_double(),
##   y_tsne = col_double(),
##   x_umap = col_double(),
##   y_umap = col_double(),
##   x_fitsne = col_double(),
##   y_fitsne = col_double(),
##   x_phate = col_double(),
##   y_phate = col_double(),
##   `x_min_dist=0.1` = col_double(),
##   `y_min_dist=0.1` = col_double(),
##   x_multicoretsne = col_double(),
##   y_multicoretsne = col_double()
## )

matrix_readcount_use <- matrix_readcount_use[, embedding_microglia$cell]
matrix_cpm_use <- calc_cpm(matrix_readcount_use)

walk(list(matrix_readcount_use, matrix_cpm_use), function(x) {
    print(object.size(x), units = "auto", standard = "SI")
})

## 95.5 MB
## 95.5 MB

Embedding visualization

EMBEDDING_TITLE_PREFIX <- "t-SNE"

x_column <- "x_tsne"
y_column <- "y_tsne"

Clustering & UMI & batch & genotype

p_embedding_microglia_cluster <- plot_embedding(
    embedding = embedding_microglia[, c(x_column, y_column)],
    color_values = embedding_microglia$louvain %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Cluster"),
    label_position = NULL,
    show_color_value_labels = TRUE,
    show_color_legend = FALSE,
    geom_point_size = 0.6,
    sort_values = FALSE
) +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 20")(
            length(unique(embedding_microglia$louvain))
        )
    ) +
    # labs(color = NULL) +
    customized_theme()

p_embedding_microglia_batch <- plot_embedding(
    embedding = embedding_microglia[, c(x_column, y_column)],
    color_values = embedding_microglia$batch %>% as.factor(),
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Batch"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.35,
    sort_values = FALSE
) +
    scale_color_manual(
        values = gg_color_hue(n = length(unique(embedding_microglia$batch)))
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()

CB_POSITION <- c(0.83, 0.275)
p_embedding_microglia_umi <- plot_embedding_value(
    embedding = embedding_microglia[, c(x_column, y_column)],
    color_values = log10(
        Matrix::colSums(matrix_readcount_use[, embedding_microglia$cell]) + 1
    ),
    colorbar_position = CB_POSITION,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; UMI"),
    label_position = NULL,
    geom_point_size = 0.6,
    sort_values = TRUE,
    FUN = function(x) x
)

p_embedding_microglia_genotype <- plot_embedding(
    embedding = embedding_microglia[, c(x_column, y_column)],
    color_values = embedding_microglia$genotype,
    label = paste0(EMBEDDING_TITLE_PREFIX, "; Genotype"),
    label_position = NULL,
    show_color_value_labels = FALSE,
    show_color_legend = TRUE,
    geom_point_size = 0.4,
    sort_values = FALSE
) +
    scale_color_manual(
        # values = gg_color_hue(n = length(unique(embedding$genotype)))
        values = yarrr::piratepal(palette = "google") %>% as.character()
    ) +
    guides(colour = guide_legend(override.aes = list(size = 3), ncol = 1)) +
    labs(color = NULL) +
    customized_theme()

Fig.2e

list(
    p_embedding_microglia_cluster,
    p_embedding_microglia_umi,
    p_embedding_microglia_genotype,
    p_embedding_microglia_batch
) %>%
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = theme(plot.margin = margin())
    )

Expression

Embedding

Fig.2g

FEATURES_SELECTED <- c(
    "ENSMUSG00000024621_Csf1r",
    "ENSMUSG00000052336_Cx3cr1",
    "ENSMUSG00000036353_P2ry12",
    "ENSMUSG00000036887_C1qa"
)

purrr::map(FEATURES_SELECTED, function(x) {
    selected_feature <- x

    plot_embedding_value(
        embedding = embedding_microglia[, c(x_column, y_column)],
        color_values = log10(
            matrix_cpm_use[selected_feature, embedding_microglia$cell] + 1
        ),
        colorbar_position = CB_POSITION,
        label = paste0(EMBEDDING_TITLE_PREFIX, "; ", x),
        # label_position = c(x_label, y_label),
        geom_point_size = 0.6,
        sort_values = TRUE,
        FUN = function(x) x
    )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(nrow = 2) +
    plot_annotation(theme = theme(plot.margin = margin()))

Cluster composition

Batch

p_barplot_cluster_composition_batch <- prepare_cluster_composition(
    embedding = embedding_microglia,
    x = louvain,
    group = batch
) %>%
    mutate(
        louvain = as.factor(louvain)
    ) %>%
    plot_barplot(
        x = louvain,
        y = percentage,
        z = batch
    )

## `summarise()` has ungrouped output. You can override using the `.groups` argument.

## `summarise()` has grouped output by 'louvain'. You can override using the `.groups` argument.

# colorspace::scale_fill_discrete_qualitative(palette = "Dark 3")

p_barplot_cluster_composition_batch

Genotype

p_barplot_cluster_composition_genotype <- prepare_cluster_composition(
    embedding = embedding_microglia,
    x = louvain,
    group = genotype
) %>%
    mutate(
        louvain = as.factor(louvain)
    ) %>%
    plot_barplot(
        x = louvain,
        y = percentage,
        z = genotype
    ) +
    scale_fill_manual(
        values = yarrr::piratepal(palette = "google") %>% as.character()
    )

p_barplot_cluster_composition_genotype

Genotype; reversed

Fig.2f

prepare_cluster_composition(
    embedding = embedding_microglia,
    x = genotype,
    group = louvain
) %>%
    arrange(louvain) %>%
    mutate(
        louvain = factor(louvain),
        genotype = fct_rev(genotype)
    ) %>%
    plot_barplot(x = genotype, y = percentage, z = louvain) +
    coord_flip() +
    scale_fill_manual(
        values = ggthemes::tableau_color_pal("Tableau 20")(
            length(unique(embedding_microglia$louvain))
        )
    )

Heatmap

FEATURES_HEATMAP <- c(
    "ENSMUSG00000018126_Baiap2l2",
    "ENSMUSG00000068129_Cst7",
    "ENSMUSG00000014599_Csf1",
    "ENSMUSG00000015568_Lpl",
    # "ENSMUSG00000062593_Lilrb4a",
    "ENSMUSG00000030789_Itgax",
    "ENSMUSG00000005533_Igf1r",
    "ENSMUSG00000000982_Ccl3",
    "ENSMUSG00000050370_Ch25h",
    "ENSMUSG00000022265_Ank",
    "ENSMUSG00000002602_Axl",
    "ENSMUSG00000024610_Cd74",
    "ENSMUSG00000002985_Apoe",
    "ENSMUSG00000069516_Lyz2",
    "ENSMUSG00000029207_Apbb2",
    "ENSMUSG00000021109_Hif1a",
    "ENSMUSG00000018927_Ccl6",
    "ENSMUSG00000029816_Gpnmb",
    "ENSMUSG00000025351_Cd63",
    "ENSMUSG00000073411_H2-D1",
    "ENSMUSG00000007891_Ctsd",
    "ENSMUSG00000023992_Trem2",
    "ENSMUSG00000052336_Cx3cr1",
    "ENSMUSG00000033192_Lpcat2",
    "ENSMUSG00000048163_Selplg",
    "ENSMUSG00000036353_P2ry12",
    "ENSMUSG00000054675_Tmem119",
    "ENSMUSG00000079227_Ccr5",
    "ENSMUSG00000029343_Crybb1"
)

matrix_heatmap <- sapply(sort(unique(embedding_microglia$batch)), function(x) {
    cells_in_group <- embedding_microglia %>%
        filter(batch == x) %>%
        pull(cell)

    Matrix::rowMeans(matrix_cpm_use[FEATURES_HEATMAP, cells_in_group])
})

matrix_heatmap <- log10(matrix_heatmap + 1)
matrix_heatmap <- t(scale(t(matrix_heatmap)))
rownames(matrix_heatmap) <- str_remove(
    string = rownames(matrix_heatmap),
    pattern = "^E.+_"
)

Fig.2c

ComplexHeatmap::Heatmap(
    matrix = matrix_heatmap,
    col = wesanderson::wes_palette("Zissou1", 100, type = "continuous"),
    rect_gp = grid::gpar(col = "white", lwd = 0.1),
    #
    cluster_rows = TRUE,
    cluster_columns = FALSE,
    #
    row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    #
    heatmap_legend_param = list(
        title = "Z score",
        title_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 6
        ),
        legend_direction = "vertical",
        labels_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        legend_height = unit(20, "mm"),
        legend_width = unit(8, "mm")
    )
    # heatmap_width = unit(8, "cm")
    # heatmap_height = unit(8, "cm")
)

R session info

devtools::session_info()$platform

##  setting  value                       
##  version  R version 4.0.3 (2020-10-10)
##  os       macOS  11.1                 
##  system   x86_64, darwin20.1.0        
##  ui       unknown                     
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/Chicago             
##  date     2020-12-17

devtools::session_info()$pack %>%
    as_tibble() %>%
    dplyr::select(
        package,
        loadedversion,
        date,
        `source`
    ) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")

package	loadedversion	date	source
assertthat	0.2.1	2019-03-21	CRAN (R 4.0.0)
backports	1.2.1	2020-12-09	CRAN (R 4.0.3)
BayesFactor	0.9.12-4.2	2018-05-19	CRAN (R 4.0.3)
BiocGenerics	0.36.0	2020-10-27	Bioconductor
broom	0.7.3.9000	2020-12-17	Github (tidymodels/broom@1c00cc8)
Cairo	1.5-12.2	2020-07-07	CRAN (R 4.0.3)
callr	3.5.1.9000	2020-11-23	Github (r-lib/callr@d44e660)
cellranger	1.1.0	2016-07-27	CRAN (R 4.0.0)
checkmate	2.0.0	2020-02-06	CRAN (R 4.0.0)
circlize	0.4.11	2020-10-31	CRAN (R 4.0.3)
cli	2.2.0	2020-11-20	CRAN (R 4.0.3)
clue	0.3-58	2020-12-03	CRAN (R 4.0.3)
cluster	2.1.0	2019-06-19	CRAN (R 4.0.3)
coda	0.19-4	2020-09-30	CRAN (R 4.0.2)
colorspace	2.0-0	2020-11-10	R-Forge (R 4.0.3)
ComplexHeatmap	2.6.2	2020-11-12	Bioconductor
crayon	1.3.4	2017-09-16	CRAN (R 4.0.0)
DBI	1.1.0	2019-12-15	CRAN (R 4.0.0)
dbplyr	2.0.0	2020-11-03	CRAN (R 4.0.3)
desc	1.2.0	2018-05-01	CRAN (R 4.0.0)
devtools	2.3.1.9000	2020-12-05	Github (r-lib/devtools@b3be019)
digest	0.6.27	2020-10-24	CRAN (R 4.0.3)
dplyr	1.0.2.9000	2020-12-14	Github (tidyverse/dplyr@59105eb)
ellipsis	0.3.1	2020-05-15	CRAN (R 4.0.3)
evaluate	0.14	2019-05-28	CRAN (R 4.0.0)
extrafont	0.17	2014-12-08	CRAN (R 4.0.2)
extrafontdb	1.0	2012-06-11	CRAN (R 4.0.0)
fansi	0.4.1	2020-01-08	CRAN (R 4.0.0)
farver	2.0.3	2020-01-16	CRAN (R 4.0.0)
forcats	0.5.0.9000	2020-12-09	Github (tidyverse/forcats@6c5e59f)
fs	1.5.0	2020-07-31	CRAN (R 4.0.3)
generics	0.1.0	2020-10-31	CRAN (R 4.0.3)
GetoptLong	1.0.5	2020-12-15	CRAN (R 4.0.3)
ggplot2	3.3.2.9000	2020-12-14	Github (tidyverse/ggplot2@9deb97b)
ggrepel	0.9.0	2020-12-16	CRAN (R 4.0.3)
ggthemes	4.2.0	2020-12-10	Github (jrnold/ggthemes@fa3d1ac)
GlobalOptions	0.1.2	2020-06-10	CRAN (R 4.0.0)
glue	1.4.2	2020-08-27	CRAN (R 4.0.2)
gt	0.2.2	2020-11-25	Github (rstudio/gt@bae32f4)
gtable	0.3.0	2019-03-25	CRAN (R 4.0.0)
gtools	3.8.2	2020-03-31	CRAN (R 4.0.0)
haven	2.3.1	2020-06-01	CRAN (R 4.0.0)
hms	0.5.3	2020-01-08	CRAN (R 4.0.0)
htmltools	0.5.0.9003	2020-12-03	Github (rstudio/htmltools@d18bd8e)
httr	1.4.2	2020-07-20	CRAN (R 4.0.2)
IRanges	2.24.1	2020-12-12	Bioconductor
jpeg	0.1-8.1	2019-10-24	CRAN (R 4.0.0)
jsonlite	1.7.2	2020-12-09	CRAN (R 4.0.3)
knitr	1.30.4	2020-12-15	Github (yihui/knitr@f8f90ba)
labeling	0.4.2	2020-10-20	CRAN (R 4.0.3)
lattice	0.20-41	2020-04-02	CRAN (R 4.0.3)
lifecycle	0.2.0	2020-03-06	CRAN (R 4.0.0)
lubridate	1.7.9.2	2020-12-17	Github (tidyverse/lubridate@8853a2a)
magick	2.5.2	2020-11-10	CRAN (R 4.0.3)
magrittr	2.0.1.9000	2020-12-14	Github (tidyverse/magrittr@bb1c86a)
Matrix	1.2-18	2019-11-27	CRAN (R 4.0.2)
MatrixModels	0.4-1	2015-08-22	CRAN (R 4.0.0)
matrixStats	0.57.0	2020-09-25	CRAN (R 4.0.2)
memoise	1.1.0	2017-04-21	CRAN (R 4.0.0)
modelr	0.1.8.9000	2020-11-23	Github (tidyverse/modelr@16168e0)
munsell	0.5.0	2018-06-12	CRAN (R 4.0.0)
mvtnorm	1.1-1	2020-06-09	CRAN (R 4.0.2)
patchwork	1.1.1	2020-12-17	CRAN (R 4.0.3)
pbapply	1.4-3	2020-08-18	CRAN (R 4.0.2)
pillar	1.4.7	2020-11-20	CRAN (R 4.0.3)
pkgbuild	1.2.0	2020-12-15	CRAN (R 4.0.3)
pkgconfig	2.0.3	2019-09-22	CRAN (R 4.0.0)
pkgload	1.1.0	2020-05-29	CRAN (R 4.0.0)
png	0.1-7	2013-12-03	CRAN (R 4.0.0)
prettyunits	1.1.1.9000	2020-11-23	Github (r-lib/prettyunits@b1cdad8)
processx	3.4.5	2020-11-30	CRAN (R 4.0.3)
ps	1.5.0	2020-12-05	CRAN (R 4.0.3)
purrr	0.3.4.9000	2020-11-23	Github (tidyverse/purrr@af06d45)
R6	2.5.0	2020-11-02	Github (r-lib/R6@6cf7d4e)
ragg	1.0.0.9000	2020-12-16	Github (r-lib/ragg@cd1a50f)
rappdirs	0.3.1	2016-03-28	CRAN (R 4.0.0)
RColorBrewer	1.1-2	2014-12-07	CRAN (R 4.0.0)
Rcpp	1.0.5	2020-07-06	CRAN (R 4.0.3)
readr	1.4.0.9000	2020-11-23	Github (tidyverse/readr@97186a8)
readxl	1.3.1.9000	2020-11-23	Github (tidyverse/readxl@9f85fa5)
remotes	2.2.0.9000	2020-12-03	Github (r-lib/remotes@5d0d9fd)
reprex	0.3.0	2019-05-16	CRAN (R 4.0.0)
reticulate	1.18	2020-10-25	CRAN (R 4.0.3)
rjson	0.2.20	2018-06-08	CRAN (R 4.0.0)
rlang	0.4.9.9000	2020-12-17	Github (r-lib/rlang@b9b7ae2)
rmarkdown	2.6.0001	2020-12-17	Github (rstudio/rmarkdown@7f3cd84)
rprojroot	2.0.2	2020-11-15	CRAN (R 4.0.3)
rstudioapi	0.13	2020-11-12	CRAN (R 4.0.3)
Rttf2pt1	1.3.8	2020-01-10	CRAN (R 4.0.0)
rvest	0.3.6	2020-07-25	CRAN (R 4.0.2)
S4Vectors	0.28.1	2020-12-09	Bioconductor
sass	0.2.0.9005	2020-12-01	Github (rstudio/sass@1dda864)
scales	1.1.1	2020-05-11	CRAN (R 4.0.3)
sessioninfo	1.1.1	2018-11-05	CRAN (R 4.0.3)
shape	1.4.5	2020-09-13	CRAN (R 4.0.2)
stringi	1.5.3	2020-09-09	CRAN (R 4.0.2)
stringr	1.4.0.9000	2020-11-23	Github (tidyverse/stringr@1f03eb0)
styler	1.3.2.9000	2020-11-30	Github (r-lib/styler@d5b8b0e)
systemfonts	0.3.2.9000	2020-12-17	Github (r-lib/systemfonts@d959a15)
testthat	3.0.1	2020-12-17	Github (r-lib/testthat@e99155a)
textshaping	0.2.1.9000	2020-12-17	Github (r-lib/textshaping@46c74ee)
tibble	3.0.4.9000	2020-12-13	Github (tidyverse/tibble@5881b43)
tidyr	1.1.2.9000	2020-12-09	Github (tidyverse/tidyr@581b488)
tidyselect	1.1.0	2020-05-11	CRAN (R 4.0.3)
tidyverse	1.3.0.9000	2020-11-23	Github (hadley/tidyverse@8a0bb99)
usethis	2.0.0.9000	2020-12-13	Github (r-lib/usethis@45d0fef)
vctrs	0.3.6	2020-12-17	CRAN (R 4.0.3)
viridisLite	0.3.0	2018-02-01	CRAN (R 4.0.0)
wesanderson	0.3.6	2018-04-20	CRAN (R 4.0.3)
withr	2.3.0	2020-09-22	CRAN (R 4.0.2)
xfun	0.19	2020-10-30	CRAN (R 4.0.3)
xml2	1.3.2	2020-04-23	CRAN (R 4.0.0)
yaml	2.2.1	2020-02-01	CRAN (R 4.0.0)
yarrr	0.1.5	2017-04-19	CRAN (R 4.0.3)

Single-nucleus transcriptomic analysis of mouse brain

JD

2020-12-17

Data preparation

Functions loading

Data loading

Matrix

Embedding

Metadata

Distinguishing major brain-cell types

Summary

Embedding visualization

Clustering & UMI & batch & genotype

Batch; separated

Expression

Embedding

Lollipop

Violin

Cluster composition

Subclustering microglia

Embedding visualization

Clustering & UMI & batch & genotype

Expression

Embedding

Cluster composition

Batch

Genotype

Genotype; reversed

Heatmap

R session info