Accurate Classification of Cardiomyopathy Diagnosis by Chromatin Accessibility

Samadrita Bhattacharyya; Jialei Duan; Ryan J. Vela; Minoti Bhakta; Pietro Bajona; Pradeep P.A. Mammen; Gary C. Hon; Nikhil V. Munshi

doi:10.1161/CIRCULATIONAHA.122.059659

Accurate Classification of Cardiomyopathy Diagnosis by Chromatin Accessibility

Author

Affiliation

Jialei Duan

UT Southwestern Medical Center

Published

Sat Aug 12, 2023 07:24:42-05:00

Abstract

This study establishes proof-of-concept for a cardiomyopathy diagnostic algorithm using chromatin accessibility signatures at a sequencing depth achievable by benchtop instruments.

Sys.time()

[1] "2023-08-12 07:23:34 CDT"

Sys.timezone()

[1] "America/Chicago"

Preparation

PROJECT_DIR <- file.path(
    "/Users/jialei/Dropbox/Data/Projects/UTSW",
    "/Cardiomyopathy/atac-seq"
)

Functions

Load required packages.

library(tidyverse)
## ── Attaching core tidyverse packages ─────────────────── tidyverse 2.0.0.9000 ──
## ✔ dplyr     1.1.2.9000     ✔ readr     2.1.4.9000
## ✔ forcats   1.0.0.9000     ✔ stringr   1.5.0.9000
## ✔ ggplot2   3.4.2.9000     ✔ tibble    3.2.1.9005
## ✔ lubridate 1.9.2.9000     ✔ tidyr     1.3.0.9000
## ✔ purrr     1.0.2.9000     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(Matrix)
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(patchwork)
library(extrafont)
## Registering fonts with R

source(
    file = file.path(
        SCRIPT_DIR,
        "utilities.R"
    )
)

`%+replace%` <- ggplot2::`%+replace%`

Python env

np <- reticulate::import("numpy", convert = TRUE)
cat("numpy version:", np$`__version__`, "\n")

numpy version: 1.24.3

reticulate::py_config()

python:         /Users/jialei/.pyenv/shims/python
libpython:      /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/libpython3.10.dylib
pythonhome:     /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3:/Users/jialei/.pyenv/versions/mambaforge-22.9.0-3
version:        3.10.9 | packaged by conda-forge | (main, Feb  2 2023, 20:26:08) [Clang 14.0.6 ]
numpy:          /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/python3.10/site-packages/numpy
numpy_version:  1.24.3
numpy:          /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/python3.10/site-packages/numpy

NOTE: Python version was forced by RETICULATE_PYTHON

Differentially accessible regions

# define function
compute_diff_peaks <- function(matrix,
                               group_a,
                               group_b,
                               num_thread = 4) {
    cat(length(group_a), "\n")
    cat(length(group_b), "\n")

    cts <- matrix[, c(
        group_a,
        group_b
    )]
    cat(ncol(cts), "\n")

    col_data <- data.frame(
        sample = c(group_a, group_b),
        category = c(
            rep("group_a", length(group_a)),
            rep("group_b", length(group_b))
        )
    )

    BiocParallel::register(BiocParallel::MulticoreParam(num_thread))

    dds <- DESeq2::DESeqDataSetFromMatrix(
        countData = cts,
        colData = col_data,
        design = ~category
    )

    dds <- DESeq2::DESeq(dds)
    res <- DESeq2::results(dds)
    DESeq2::summary(res)

    return(res)
}

# sample metadata
samples_fresh_healthy <- c(
    "F1_1", "F1_2", "F2_1", "F2_2",
    "F5_1", "F5_2", "P3_1", "P3_2",
    "P5_1", "P5_2", "P6_1", "P6_2"
)

samples_fresh_icm <- c(
    "P104a_1", "P104a_2", "P117b_1", "P117b_2",
    "P123b_1", "P123b_2", "P131a_1", "P131a_2",
    "P92a_1", "P92a_2"
)

samples_fresh_nicm <- c(
    "P114b_1", "P114b_2", "P59a_1", "P59a_2",
    "P60a_1", "P60a_2", "P73a_1", "P73a_2",
    "P87a_1", "P87a_2"
)

samples_fresh_hcm <- c(
    "HOCM4_1", "HOCM4_2", "HOCM6_1", "HOCM6_2",
    "HOCM9_1", "HOCM9_2", "HOCM11_1", "HOCM11_2",
    "HOCM7_1", "HOCM7_2"
)

samples <- list(
    fresh_healthy = samples_fresh_healthy,
    fresh_icm = samples_fresh_icm,
    fresh_nicm = samples_fresh_nicm,
    fresh_hcm = samples_fresh_hcm
)

Pairwise comparison

samples_combn <- combinat::combn(x = names(samples), m = 2)

diff_peaks <- purrr::map(seq_len(ncol(samples_combn)), \(x) {
    sample_names_a <- samples[[samples_combn[, x][1]]]
    sample_names_b <- samples[[samples_combn[, x][2]]]

    cat(length(sample_names_a), "\n")
    cat(sample_names_a, "\n")
    cat(length(sample_names_b), "\n")
    cat(sample_names_b, "\n")

    compute_diff_peaks(
        matrix = matrix_readcount_use,
        group_a = sample_names_a,
        group_b = sample_names_b,
        num_thread = parallel::detectCores() - 1
    )
})

12 
F1_1 F1_2 F2_1 F2_2 F5_1 F5_2 P3_1 P3_2 P5_1 P5_2 P6_1 P6_2 
10 
P104a_1 P104a_2 P117b_1 P117b_2 P123b_1 P123b_2 P131a_1 P131a_2 P92a_1 P92a_2 
12 
10 
22

Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
design formula are characters, converting to factors

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing


out of 206017 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 20685, 10%
LFC < 0 (down)     : 16920, 8.2%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

12 
F1_1 F1_2 F2_1 F2_2 F5_1 F5_2 P3_1 P3_2 P5_1 P5_2 P6_1 P6_2 
10 
P114b_1 P114b_2 P59a_1 P59a_2 P60a_1 P60a_2 P73a_1 P73a_2 P87a_1 P87a_2 
12 
10 
22

Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
design formula are characters, converting to factors

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

-- replacing outliers and refitting for 3 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing


out of 206019 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 7721, 3.7%
LFC < 0 (down)     : 4264, 2.1%
outliers [1]       : 0, 0%
low counts [2]     : 31954, 16%
(mean count < 5)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

12 
F1_1 F1_2 F2_1 F2_2 F5_1 F5_2 P3_1 P3_2 P5_1 P5_2 P6_1 P6_2 
10 
HOCM4_1 HOCM4_2 HOCM6_1 HOCM6_2 HOCM9_1 HOCM9_2 HOCM11_1 HOCM11_2 HOCM7_1 HOCM7_2 
12 
10 
22

Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
design formula are characters, converting to factors

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

-- replacing outliers and refitting for 4 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing


out of 206019 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 3112, 1.5%
LFC < 0 (down)     : 3829, 1.9%
outliers [1]       : 0, 0%
low counts [2]     : 23966, 12%
(mean count < 4)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

10 
P104a_1 P104a_2 P117b_1 P117b_2 P123b_1 P123b_2 P131a_1 P131a_2 P92a_1 P92a_2 
10 
P114b_1 P114b_2 P59a_1 P59a_2 P60a_1 P60a_2 P73a_1 P73a_2 P87a_1 P87a_2 
10 
10 
20

Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
design formula are characters, converting to factors

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

-- replacing outliers and refitting for 3 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing


out of 206018 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 7031, 3.4%
LFC < 0 (down)     : 12331, 6%
outliers [1]       : 0, 0%
low counts [2]     : 59914, 29%
(mean count < 5)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

10 
P104a_1 P104a_2 P117b_1 P117b_2 P123b_1 P123b_2 P131a_1 P131a_2 P92a_1 P92a_2 
10 
HOCM4_1 HOCM4_2 HOCM6_1 HOCM6_2 HOCM9_1 HOCM9_2 HOCM11_1 HOCM11_2 HOCM7_1 HOCM7_2 
10 
10 
20

Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
design formula are characters, converting to factors

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

-- replacing outliers and refitting for 5 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing


out of 206019 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 9286, 4.5%
LFC < 0 (down)     : 15191, 7.4%
outliers [1]       : 0, 0%
low counts [2]     : 39943, 19%
(mean count < 5)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

10 
P114b_1 P114b_2 P59a_1 P59a_2 P60a_1 P60a_2 P73a_1 P73a_2 P87a_1 P87a_2 
10 
HOCM4_1 HOCM4_2 HOCM6_1 HOCM6_2 HOCM9_1 HOCM9_2 HOCM11_1 HOCM11_2 HOCM7_1 HOCM7_2 
10 
10 
20

Warning in DESeqDataSet(se, design = design, ignoreRank): some variables in
design formula are characters, converting to factors

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

-- replacing outliers and refitting for 5 genes
-- DESeq argument 'minReplicatesForReplace' = 7 
-- original counts are preserved in counts(dds)

estimating dispersions

fitting model and testing


out of 206018 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 3799, 1.8%
LFC < 0 (down)     : 7798, 3.8%
outliers [1]       : 0, 0%
low counts [2]     : 79885, 39%
(mean count < 7)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

names(diff_peaks) <- purrr::map_chr(
    seq_len(ncol(samples_combn)), \(x) {
        paste(samples_combn[, x], collapse = "_vs_")
    }
)

FC_THRESHOLD <- 1
PADJ_THRESHOLD <- 0.01

features_selected <- purrr::map(diff_peaks, \(x) {
    x |>
        as.data.frame() |>
        dplyr::filter(
            padj < PADJ_THRESHOLD,
            abs(log2FoldChange) >= FC_THRESHOLD
        ) |>
        tibble::rownames_to_column(var = "feature") |>
        pull(feature)
}) |>
    unlist() |>
    unique()

Loading required package: DESeq2

Loading required package: S4Vectors

Loading required package: stats4

Loading required package: BiocGenerics


Attaching package: 'BiocGenerics'

The following objects are masked from 'package:lubridate':

    intersect, setdiff, union

The following objects are masked from 'package:dplyr':

    combine, intersect, setdiff, union

The following objects are masked from 'package:stats':

    IQR, mad, sd, var, xtabs

The following objects are masked from 'package:base':

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unsplit, which.max, which.min


Attaching package: 'S4Vectors'

The following objects are masked from 'package:Matrix':

    expand, unname

The following objects are masked from 'package:lubridate':

    second, second<-

The following objects are masked from 'package:dplyr':

    first, rename

The following object is masked from 'package:tidyr':

    expand

The following object is masked from 'package:utils':

    findMatches

The following objects are masked from 'package:base':

    expand.grid, I, unname

Loading required package: IRanges


Attaching package: 'IRanges'

The following object is masked from 'package:lubridate':

    %within%

The following objects are masked from 'package:dplyr':

    collapse, desc, slice

The following object is masked from 'package:purrr':

    reduce

Loading required package: GenomicRanges

Loading required package: GenomeInfoDb

Loading required package: SummarizedExperiment

Loading required package: MatrixGenerics

Loading required package: matrixStats


Attaching package: 'matrixStats'

The following object is masked from 'package:dplyr':

    count


Attaching package: 'MatrixGenerics'

The following objects are masked from 'package:matrixStats':

    colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
    colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
    colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
    colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
    colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
    colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
    colWeightedMeans, colWeightedMedians, colWeightedSds,
    colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
    rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
    rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
    rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
    rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
    rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
    rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
    rowWeightedSds, rowWeightedVars

Loading required package: Biobase

Welcome to Bioconductor

    Vignettes contain introductory material; view with
    'browseVignettes()'. To cite Bioconductor, see
    'citation("Biobase")', and for packages 'citation("pkgname")'.


Attaching package: 'Biobase'

The following object is masked from 'package:MatrixGenerics':

    rowMedians

The following objects are masked from 'package:matrixStats':

    anyMissing, rowMedians

features_selected |> length()

[1] 5753

# sanity check
stopifnot(
    length(features_selected) == 5753
)

Healthy vs. disease

diff_peaks <- compute_diff_peaks(
    matrix = matrix_readcount_use,
    group_a = samples_fresh_healthy,
    group_b = c(
        samples_fresh_icm,
        samples_fresh_nicm,
        samples_fresh_hcm
    ),
    num_thread = parallel::detectCores() - 1
)

12 
30 
42 

out of 206019 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 14884, 7.2%
LFC < 0 (down)     : 15309, 7.4%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results

FC_THRESHOLD <- 1
PADJ_THRESHOLD <- 0.05

Plot
Data

diff_peaks |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "feature") |>
    dplyr::mutate(
        da = dplyr::case_when(
            (
                (
                    abs(log2FoldChange) >= FC_THRESHOLD
                ) & (
                    padj <= PADJ_THRESHOLD
                )
            ) ~ 1,
            TRUE ~ 0
        ),
        da = as.factor(da)
    ) |>
    dplyr::arrange(da) |>
    dplyr::mutate(
        category = "Healthy vs Disease"
    ) |>
    {
        \(x)
        x |>
            ggplot2::ggplot(
                ggplot2::aes(
                    x = log2(baseMean),
                    y = log2FoldChange,
                    color = da
                )
            ) +
            ggrastr::rasterise(
                ggplot2::geom_point(
                    size = 0.4,
                    alpha = 1,
                    stroke = 0, shape = 16
                ),
                dpi = 900,
                dev = "ragg_png"
            ) +
            ggplot2::geom_density_2d(
                color = "steelblue", size = 0.2
            ) +
            ggplot2::scale_color_manual(
                values = c("grey65", "salmon"), guide = "none"
            ) +
            ggplot2::facet_wrap(
                ggplot2::vars(category),
                nrow = 1,
                strip.position = "top",
            ) +
            ggplot2::labs(
                x = expression(paste("Log"[2], " mean")),
                y = expression(paste("Log"[2], " (fold change)"))
            ) +
            ggplot2::geom_text(
                data = x |>
                    dplyr::count(da),
                ggplot2::aes(label = paste("Num:", n)),
                x = -5,
                y = 5,
                color = "salmon",
                fontface = "bold",
                size = 6 / ggplot2::.pt,
                hjust = 0,
                vjust = 1
            ) +
            ggplot2::theme_light() %+replace%
            ggplot2::theme(
                axis.title = ggplot2::element_text(
                    family = "Arial", size = 7
                ),
                axis.text = ggplot2::element_text(
                    family = "Arial", size = 7
                ),
                panel.background = ggplot2::element_blank(),
                strip.text = ggplot2::element_text(
                    family = "Arial",
                    size = 7,
                    margin = ggplot2::margin(
                        t = 4.4, r = 4.4, b = 4.4, l = 4.4, unit = "pt"
                    )
                )
            )
    }()

diff_peaks |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "feature") |>
    dplyr::mutate(
        da = dplyr::case_when(
            abs(log2FoldChange) >= FC_THRESHOLD & padj <= PADJ_THRESHOLD ~ 1,
            TRUE ~ 0
        ),
        da = as.factor(da)
    ) |>
    dplyr::arrange(da) |>
    dplyr::mutate(
        category = "Healthy vs Disease"
    ) |>
    head(n = 12)

           feature    baseMean log2FoldChange     lfcSE        stat     pvalue
1  1_181358_181567    6.285720    -0.25082657 0.2225674 -1.12696904 0.25975557
2  1_183716_183885    2.917794    -0.61104587 0.3185859 -1.91799391 0.05511178
3  1_183999_184321    5.587581    -0.40899805 0.2454201 -1.66652230 0.09560943
4  1_191239_191880   16.735046    -0.19449895 0.1594596 -1.21973776 0.22256430
5  1_267894_268128    6.533567     0.05967949 0.2321327  0.25709207 0.79710770
6  1_629834_630082  704.298690     0.03921131 0.1246965  0.31445402 0.75317623
7  1_633881_634177 1670.223559    -0.10947573 0.1252210 -0.87426032 0.38197650
8  1_778297_779354   78.566315     0.01854268 0.1195157  0.15514849 0.87670426
9  1_794936_795259    7.667010    -0.44843135 0.2054008 -2.18320186 0.02902095
10 1_816789_817532   15.907760    -0.26947734 0.1423673 -1.89283152 0.05838028
11 1_818925_819284    7.305783     0.20719291 0.2135904  0.97004803 0.33202255
12 1_819939_820383    8.225880     0.01173971 0.2131140  0.05508655 0.95606951
        padj da           category
1  0.5057420  0 Healthy vs Disease
2  0.2128599  0 Healthy vs Disease
3  0.2896372  0 Healthy vs Disease
4  0.4649598  0 Healthy vs Disease
5  0.9045374  0 Healthy vs Disease
6  0.8802030  0 Healthy vs Disease
7  0.6229343  0 Healthy vs Disease
8  0.9438932  0 Healthy vs Disease
9  0.1480275  0 Healthy vs Disease
10 0.2197918  0 Healthy vs Disease
11 0.5774971  0 Healthy vs Disease
12 0.9813831  0 Healthy vs Disease

Heatmap

RASTERISED <- FALSE

matrix_cpm_use <- t(
    t(matrix_readcount_use) / colSums(matrix_readcount_use)
) * 1e+06

matrix_heatmap <- matrix_cpm_use[features_selected, unlist(samples)]
matrix_heatmap <- matrix_heatmap[rowSums(matrix_heatmap) != 0, ]

matrix_heatmap <- log10(matrix_heatmap + 1)
matrix_heatmap <- t(scale(t(matrix_heatmap)))

heatmap_limits <- quantile(matrix_heatmap, c(0.05, 0.95))
matrix_heatmap[matrix_heatmap < heatmap_limits[1]] <- heatmap_limits[1]
matrix_heatmap[matrix_heatmap > heatmap_limits[2]] <- heatmap_limits[2]

# hierarchical clustering
hclust_out_features <- hclust(
    dist(matrix_heatmap, method = "euclidean"),
    method = "complete"
)

NUM_CENTERS <- 4
hclust_out_features_df <- cutree(
    tree = hclust_out_features, k = NUM_CENTERS
) |>
    tibble::as_tibble(rownames = "feature") |>
    dplyr::mutate(order = hclust_out_features$order) |>
    dplyr::rename(hclust_group = value)

hclust_out_features_df <- purrr::map2(
    c(2, 1, 4, 3), seq_len(NUM_CENTERS), \(x, y) {
        hclust_out_features_df |>
            dplyr::filter(hclust_group == x) |>
            dplyr::arrange(order) |>
            dplyr::mutate(group = y)
    }
) |>
    dplyr::bind_rows()

# heatmap column annotation
ha_group <- colnames(matrix_heatmap) |>
    tibble::enframe(value = "sample") |>
    dplyr::left_join(
        purrr::map(names(samples), \(x) {
            data.frame(
                sample = samples[[x]],
                group = x
            )
        }) |>
            dplyr::bind_rows(),
        by = "sample"
    ) |>
    dplyr::pull(group) |>
    stringr::str_to_title()

ha_column <- ComplexHeatmap::HeatmapAnnotation(
    group = ComplexHeatmap::anno_simple(
        ha_group,
        col = setNames(
            object = as.character(yarrr::piratepal(palette = "google")),
            nm = names(samples) |>
                stringr::str_to_title()
        ),
        which = "column",
        pt_size = grid::unit(2, "mm"),
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = grid::unit(1.5, "mm")
    ),
    #
    show_annotation_name = FALSE,
    annotation_label = c(
        "Group"
    ),
    annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    annotation_name_side = "left"
)

# heatmap row annotation
ha_region <- hclust_out_features_df |>
    pull(group) |>
    {
        \(x) paste("Signature", x)
    }()

ha_left <- ComplexHeatmap::HeatmapAnnotation(
    region = ComplexHeatmap::anno_simple(
        ha_region,
        col = setNames(
            object = as.character(yarrr::piratepal(palette = "google")),
            nm = sort(unique(ha_region))
        ),
        which = "row",
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = grid::unit(1.5, "mm")
    ),
    which = "row",
    show_annotation_name = FALSE,
    annotation_label = c(
        "Region"
    ),
    annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6)
)

sample_ids <- tibble::tribble(
    ~sample_id, ~sample_id_publication,
    "F1_1", "C1_1",
    "F1_2", "C1_2",
    "F2_1", "C2_1",
    "F2_2", "C2_2",
    "F5_1", "C3_1",
    "F5_2", "C3_2",
    "P3_1", "C4_1",
    "P3_2", "C4_2",
    "P5_1", "C5_1",
    "P5_2", "C5_2",
    "P6_1", "C6_1",
    "P6_2", "C6_2",
    "P104a_1", "I1_1",
    "P104a_2", "I1_2",
    "P117b_1", "I2_1",
    "P117b_2", "I2_2",
    "P123b_1", "I3_1",
    "P123b_2", "I3_2",
    "P131a_1", "I4_1",
    "P131a_2", "I4_2",
    "P92a_1", "I5_1",
    "P92a_2", "I5_2",
    "P114b_1", "NI1_1",
    "P114b_2", "NI1_2",
    "P59a_1", "NI2_1",
    "P59a_2", "NI2_2",
    "P60a_1", "NI3_1",
    "P60a_2", "NI3_2",
    "P73a_1", "NI4_1",
    "P73a_2", "NI4_2",
    "P87a_1", "NI5_1",
    "P87a_2", "NI5_2",
    "P75a_1", "U1_1",
    "P75a_2", "U1_2",
    "P115b_1", "U2_1",
    "P115b_2", "U2_2",
    "P141a_1", "U3_1",
    "P141a_2", "U3_2",
    "HOCM4_1", "H1_1",
    "HOCM4_2", "H1_2",
    "HOCM6_1", "H2_1",
    "HOCM6_2", "H2_2",
    "HOCM9_1", "H3_1",
    "HOCM9_2", "H3_2",
    "HOCM11_1", "H4_1",
    "HOCM11_2", "H4_2",
    "HOCM7_1", "H5_1",
    "HOCM7_2", "H5_2",
    "MYEC4_2", "U6_2",
    "P108b_1", "U4_1",
    "P108b_2", "U4_2"
)

sample_ids <- setNames(
    object = sample_ids$sample_id_publication,
    nm = sample_ids$sample_id
)

# heatmap
ht <- ComplexHeatmap::Heatmap(
    matrix = matrix_heatmap |> as.matrix(),
    rect_gp = grid::gpar(col = NA, lwd = 0),
    col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
    row_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    column_title_rot = 0,
    #
    cluster_rows = FALSE,
    show_row_dend = FALSE,
    cluster_columns = FALSE,
    show_column_dend = FALSE,
    #
    show_row_names = FALSE,
    show_column_names = TRUE,
    column_labels = sample_ids[colnames(matrix_heatmap)],
    column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    #
    top_annotation = ha_column,
    bottom_annotation = NULL,
    left_annotation = ha_left,
    #
    column_split = ha_group |>
        tibble::enframe() |>
        dplyr::mutate(
            value = stringr::str_remove(
                string = value, pattern = "Fresh_"
            ),
            value = case_when(
                value == "healthy" ~ "Healthy",
                TRUE ~ stringr::str_to_upper(value)
            )
        ) |>
        dplyr::pull(value) |>
        factor(
            levels = c("Healthy", "ICM", "NICM", "HCM")
        ),
    column_gap = grid::unit(0, "mm"),
    #
    show_heatmap_legend = TRUE,
    heatmap_legend_param = list(
        title = "Z score",
        title_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 6
        ),
        legend_direction = "vertical",
        labels_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        legend_height = grid::unit(12, "mm"),
        legend_width = grid::unit(4, "mm")
    ),
    #
    use_raster = RASTERISED
)

# legend
lgd_group <- ComplexHeatmap::Legend(
    title = "Group",
    labels = ha_group |> unique(),
    legend_gp = grid::gpar(
        fill = setNames(
            object = as.character(yarrr::piratepal(palette = "google")),
            nm = names(samples) |>
                stringr::str_remove(pattern = "fresh_") |>
                stringr::str_to_title()
        )
    ),
    labels_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 6
    ),
    title_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 6
    )
)

pd <- ComplexHeatmap::packLegend(
    lgd_group,
    direction = "vertical"
)

# draw
ComplexHeatmap::draw(
    ht,
    heatmap_legend_list = list(pd)
)

Code

enriched_go |>
    dplyr::mutate(
        category = factor(
            category,
            levels = c(
                "fresh_healthy",
                "fresh_icm",
                "fresh_nicm",
                "fresh_hcm"
            )
        ),
        rank = factor(rank, levels = 5:1)
    ) |>
    ggplot2::ggplot(
        ggplot2::aes(
            x = p_value_log,
            y = rank,
            fill = category,
        )
    ) +
    ggplot2::geom_bar(stat = "identity") +
    ggplot2::facet_wrap(
        ggplot2::vars(category),
        ncol = 1,
        strip.position = "left",
        scales = "free_x",
        labeller = ggplot2::labeller(
            category = c(
                "fresh_healthy" = "Healthy",
                "fresh_icm" = "ICM",
                "fresh_nicm" = "NICM",
                "fresh_hcm" = "HCM"
            )
        )
    ) +
    ggplot2::geom_text(
        ggplot2::aes(
            x = 0,
            label = term,
            group = NULL
        ),
        size = 6 / ggplot2::.pt,
        family = "Arial",
        color = "black",
        data = enriched_go |>
            dplyr::mutate(
                term = stringr::str_replace(
                    string = term,
                    pattern = "organization",
                    replacement = "org."
                ),
                category = factor(
                    category,
                    levels = c(
                        "fresh_healthy",
                        "fresh_icm",
                        "fresh_nicm",
                        "fresh_hcm"
                    )
                ),
                rank = factor(rank, levels = 5:1),
                term = paste(" ", term)
            ),
        hjust = 0
    ) +
    ggplot2::scale_x_continuous(
        name = expression(paste("-log"[10], " p")),
        labels = scales::math_format(10^.x)
    ) +
    ggplot2::scale_y_discrete(name = NULL) +
    ggplot2::scale_fill_manual(
        values = as.character(yarrr::piratepal(palette = "google"))
    ) +
    ggplot2::guides(fill = "none") +
    ggplot2::theme_light() %+replace%
    ggplot2::theme(
        axis.title = ggplot2::element_blank(),
        axis.text.x = ggplot2::element_text(family = "Arial", size = 7),
        axis.text.y = ggplot2::element_blank(),
        axis.ticks.y = ggplot2::element_blank(),
        panel.background = ggplot2::element_blank(),
        strip.text = ggplot2::element_text(
            family = "Arial", size = 7,
            margin = ggplot2::margin(
                t = 4.4, r = 4.4, b = 4.4, l = 4.4, unit = "pt"
            )
        )
    )

enriched_go |> knitr::kable()

category	rank	go_id	term	p_value	p_value_log
fresh_healthy	1	GO:0009653	anatomical structure morphogenesis	0.0e+00	13.552842
fresh_healthy	2	GO:0048468	cell development	0.0e+00	12.721246
fresh_healthy	3	GO:0006928	movement of cell or subcellular component	0.0e+00	11.657577
fresh_healthy	4	GO:0023052	signaling	0.0e+00	10.886057
fresh_icm	1	GO:0030029	actin filament-based process	0.0e+00	22.136677
fresh_icm	2	GO:0003012	muscle system process	0.0e+00	21.585027
fresh_icm	3	GO:0006936	muscle contraction	0.0e+00	18.397940
fresh_icm	4	GO:0061061	muscle structure development	0.0e+00	18.251812
fresh_nicm	1	GO:0007155	cell adhesion	0.0e+00	10.657577
fresh_nicm	2	GO:0007275	multicellular organism development	0.0e+00	9.602060
fresh_nicm	3	GO:0016477	cell migration	0.0e+00	9.229148
fresh_nicm	4	GO:0120036	plasma membrane bounded cell projection organization	1.0e-07	6.853872
fresh_hcm	1	GO:0034332	adherens junction organization	5.3e-05	4.275724
fresh_hcm	2	GO:0072073	kidney epithelium development	2.4e-04	3.619789
fresh_hcm	3	GO:0034330	cell junction organization	3.0e-04	3.522879
fresh_hcm	4	GO:0032502	developmental process	3.4e-04	3.468521

Embedding

SEED <- 20220408

color_palette_group <- setNames(
    object = as.character(yarrr::piratepal(palette = "google")),
    nm = c("Healthy", "ICM", "NICM", "HCM")
)

matrix_cpm_use <- t(
    t(matrix_readcount_use) / colSums(matrix_readcount_use)
) * 1e+06

embedding <- purrr::map(
    list(rownames(matrix_cpm_use), features_selected), \(x) {
        matrix_umap <- matrix_cpm_use[x, unlist(samples)]
        matrix_umap <- matrix_umap[rowSums(matrix_umap) >= 30, ]
        matrix_umap <- log1p(matrix_umap)
        matrix_umap <- t(
            scale(t(matrix_umap), center = TRUE, scale = TRUE)
        )

        set.seed(seed = SEED)
        embedding_umap <- uwot::umap(
            X = t(matrix_umap),
            n_neighbors = 10,
            n_components = 2,
            metric = "euclidean",
            spread = 1,
            min_dist = 0.01,
            n_threads = 1,
            verbose = TRUE
        )
    }
) |>
    purrr::reduce(cbind) |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "sample") |>
    dplyr::rename(
        "x_umap1" = "V1",
        "y_umap1" = "V2",
        "x_umap2" = "V3",
        "y_umap2" = "V4"
    ) |>
    dplyr::mutate(
        category = case_when(
            sample %in% samples$fresh_healthy ~ "Healthy",
            sample %in% samples$fresh_icm ~ "ICM",
            sample %in% samples$fresh_nicm ~ "NICM",
            sample %in% samples$fresh_hcm ~ "HCM"
        ),
        category = factor(
            category,
            levels = c("Healthy", "ICM", "NICM", "HCM")
        )
    )

07:24:11 UMAP embedding parameters a = 1.896 b = 0.8006

07:24:11 Read 42 rows and found 205154 numeric columns

07:24:11 Using FNN for neighbor search, n_neighbors = 10

07:24:12 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 10
07:24:13 Initializing from normalized Laplacian + noise (using irlba)
07:24:13 Commencing optimization for 500 epochs, with 552 positive edges
07:24:13 Optimization finished
07:24:13 UMAP embedding parameters a = 1.896 b = 0.8006
07:24:13 Read 42 rows and found 5751 numeric columns
07:24:13 Using FNN for neighbor search, n_neighbors = 10
07:24:14 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 10
07:24:14 Initializing from normalized Laplacian + noise (using irlba)
07:24:14 Commencing optimization for 500 epochs, with 416 positive edges
07:24:14 Optimization finished

GEOM_POINT_SIZE <- 2
EMBEDDING_TITLE_PREFIX <- "UMAP"

purrr::map2(
    list("1", "2"),
    list(nrow(matrix_cpm_use), length(features_selected)), \(x, y) {
        plot_embedding(
            data = embedding[, paste0(c("x_umap", "y_umap"), x)],
            color = embedding$category |> as.factor(),
            label = glue::glue("{EMBEDDING_TITLE_PREFIX}; {y}"),
            label_position = NULL,
            show_color_value_labels = FALSE,
            show_color_legend = FALSE,
            geom_point_size = GEOM_POINT_SIZE,
            sort_values = FALSE,
            rasterise = FALSE
        ) +
            scale_color_manual(
                values = color_palette_group
            ) +
            theme_customized_embedding()
    }
) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 2) +
    patchwork::plot_annotation(
        theme = ggplot2::theme(plot.margin = ggplot2::margin())
    )

Prediction

N_REPLICATES <- 100
N_SAMPLES_TEST <- 2

AUC; multiple

76 M

matrix_rf_core_set <- matrix_cpm_use[
    rownames(matrix_cpm_use) %in% features_selected,
    unlist(samples)
] |>
    t() |>
    as.matrix() |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "sample") |>
    dplyr::mutate(
        group = case_when(
            sample %in% samples_fresh_healthy ~ "healthy",
            sample %in% samples_fresh_icm ~ "icm",
            sample %in% samples_fresh_nicm ~ "nicm",
            sample %in% samples_fresh_hcm ~ "hcm"
        ),
        group = factor(
            group,
            levels = c("healthy", "icm", "nicm", "hcm")
        )
    )

rownames(matrix_rf_core_set) <- matrix_rf_core_set$sample
matrix_rf_core_set$sample <- NULL
colnames(matrix_rf_core_set) <- make.names(
    colnames(matrix_rf_core_set)
)

# sanity check
stopifnot(
    class(matrix_rf_core_set) == "data.frame"
)

roc_data <- purrr::map(seq_len(N_REPLICATES), \(x) {
    # roc_data <- furrr::future_map(seq_len(N_REPLICATES), \(x) {
    samples_test <- purrr::map(samples, \(x) {
        sample(
            stringr::str_remove(
                string = x,
                pattern = "_[1|2]"
            ) |> unique(),
            N_SAMPLES_TEST
        )
    })

    samples_test <- unlist(samples)[unlist(samples) |>
        stringr::str_remove(
            pattern = "_[1|2]"
        ) %in% unlist(samples_test)]

    matrix_rf_core_set_test <- matrix_rf_core_set[samples_test, ]
    matrix_rf_core_set_train <- matrix_rf_core_set[
        !rownames(matrix_rf_core_set) %in% samples_test,
    ]

    rf_model <- randomForest::randomForest(
        group ~ .,
        data = matrix_rf_core_set_train[
            , !colnames(matrix_rf_core_set_train) %in% c("sample"),
        ],
        ntree = 1000
    )
    rf_prediction <- predict(
        rf_model,
        matrix_rf_core_set_test,
        type = "prob"
    )

    roc_rf <- pROC::multiclass.roc(
        matrix_rf_core_set_test$group,
        rf_prediction[, 2]
    )

    cat(
        nrow(matrix_rf_core_set_test),
        nrow(matrix_rf_core_set_train),
        pROC::auc(roc_rf),
        "\n"
    )

    return(roc_rf)
})
names(roc_data) <- seq_len(N_REPLICATES)

roc_data <- format_roc(roc_data = roc_data)
roc_averaged <- average_roc(roc_data = roc_data)

20 M

library_size <- tibble::tribble(
    ~sample, ~num_reads,
    "F1_1", 60964390,
    "F1_2", 57968864,
    "F2_1", 54158970,
    "F2_2", 143929746,
    "F5_1", 55528072,
    "F5_2", 64044782,
    "P3_1", 78506662,
    "P3_2", 80161416,
    "P5_1", 100592008,
    "P5_2", 99926658,
    "P6_1", 74427936,
    "P6_2", 74223738,
    "P104a_1", 61766652,
    "P104a_2", 71350414,
    "P117b_1", 66442460,
    "P117b_2", 76837402,
    "P123b_1", 80766906,
    "P123b_2", 82552378,
    "P131a_1", 88475442,
    "P131a_2", 68512592,
    "P92a_1", 99683410,
    "P92a_2", 74318972,
    "P114b_1", 69490016,
    "P114b_2", 74175524,
    "P59a_1", 72517888,
    "P59a_2", 75524788,
    "P60a_1", 75844218,
    "P60a_2", 77979036,
    "P73a_1", 109707158,
    "P73a_2", 81454894,
    "P87a_1", 68502630,
    "P87a_2", 52512570,
    "HOCM4_1", 63494724,
    "HOCM4_2", 64409436,
    "HOCM6_1", 64873814,
    "HOCM6_2", 80750200,
    "HOCM9_1", 81728322,
    "HOCM9_2", 74958418,
    "HOCM11_1", 65222662,
    "HOCM11_2", 77667354,
    "HOCM7_1", 69229542,
    "HOCM7_2", 80768964
)

library_size <- setNames(
    object = library_size$num_reads,
    nm = library_size$sample
)

TARGET_NUM_READS <- 20000000

roc_data <- purrr::map(seq_len(N_REPLICATES), \(x) {
    # roc_data <- furrr::future_map(seq_len(N_REPLICATES), \(x) {
    samples_test <- purrr::map(samples, \(x) {
        sample(
            stringr::str_remove(
                string = x,
                pattern = "_[1|2]"
            ) |> unique(),
            N_SAMPLES_TEST
        )
    })

    samples_test <- unlist(samples)[unlist(samples) |>
        stringr::str_remove(
            pattern = "_[1|2]"
        ) %in% unlist(samples_test)]

    matrix_rf_core_set_train <- matrix_rf_core_set[
        !rownames(matrix_rf_core_set) %in% samples_test,
    ]

    matrix_cpm_test <- downsample_matrix(
        matrix = matrix_readcount_use[, samples_test],
        proportion = TARGET_NUM_READS / library_size[samples_test],
        seed = SEED
    ) |>
        as.matrix() |>
        calc_cpm()

    matrix_rf_core_set_test <- matrix_cpm_test[
        rownames(matrix_cpm_test) %in% features_selected,
        unlist(samples_test)
    ] |>
        t() |>
        as.matrix() |>
        as.data.frame() |>
        tibble::rownames_to_column(var = "sample") |>
        dplyr::mutate(
            group = case_when(
                sample %in% samples_fresh_healthy ~ "healthy",
                sample %in% samples_fresh_icm ~ "icm",
                sample %in% samples_fresh_nicm ~ "nicm",
                sample %in% samples_fresh_hcm ~ "hcm"
            ),
            group = factor(
                group,
                levels = c("healthy", "icm", "nicm", "hcm")
            )
        )

    rownames(matrix_rf_core_set_test) <- matrix_rf_core_set_test$sample
    matrix_rf_core_set_test$sample <- NULL
    colnames(matrix_rf_core_set_test) <- make.names(
        colnames(matrix_rf_core_set_test)
    )

    class(matrix_rf_core_set_test)
    matrix_rf_core_set_test[1:5, 1:5]

    rf_model <- randomForest::randomForest(
        group ~ .,
        data = matrix_rf_core_set_train[
            ,
            !colnames(matrix_rf_core_set_train) %in% c("sample"),
        ],
        ntree = 1000
    )
    rf_prediction <- predict(
        rf_model,
        matrix_rf_core_set_test,
        type = "prob"
    )

    roc_rf <- pROC::multiclass.roc(
        matrix_rf_core_set_test$group, rf_prediction[, 2]
    )

    cat(
        nrow(matrix_rf_core_set_test),
        nrow(matrix_rf_core_set_train),
        pROC::auc(roc_rf),
        "\n"
    )

    return(roc_rf)
})
names(roc_data) <- seq_len(N_REPLICATES)

roc_data <- format_roc(roc_data = roc_data)
roc_averaged_downsampled <- average_roc(roc_data = roc_data)

plot_roc_curve_two(
    data = list(roc_averaged, roc_averaged_downsampled),
    label = c("76 M", "20 M")
)

Construct model

Build

matrix_rf_train <- matrix_cpm_use[
    rownames(matrix_cpm_use) %in% features_selected,
    c(
        samples_fresh_healthy,
        samples_fresh_icm,
        samples_fresh_nicm,
        samples_fresh_hcm
    )
]

matrix_rf_train <- matrix_rf_train |>
    t() |>
    as.matrix() |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "sample") |>
    dplyr::mutate(
        group = case_when(
            sample %in% samples_fresh_healthy ~ "healthy",
            sample %in% samples_fresh_icm ~ "icm",
            sample %in% samples_fresh_nicm ~ "nicm",
            sample %in% samples_fresh_hcm ~ "hcm"
        ),
        group = factor(
            group,
            levels = c("healthy", "icm", "nicm", "hcm")
        )
    )

rownames(matrix_rf_train) <- matrix_rf_train$sample
matrix_rf_train$sample <- NULL
colnames(matrix_rf_train) <- make.names(colnames(matrix_rf_train))

# sanity check
stopifnot(
    class(matrix_rf_train) == "data.frame"
)

set.seed(seed = SEED)
rf_model <- randomForest::randomForest(
    group ~ .,
    data = matrix_rf_train[
        ,
        !colnames(matrix_rf_train) %in% c("sample"),
    ],
    ntree = 1000
)

Predict

samples_selected <- tibble::tribble(
    ~sample, ~ratio,
    "P97a_1", 0.3677451197,
    "P97a_2", 0.396041047,
    "P141a_1", 0.208162124,
    "P141a_2", 0.312910588,
    "MYEC4_2", 0.320481892,
    "P108b_1", 0.239929499,
    "P108b_2", 0.321824834
)

matrix_readcount_p1 <- matrix_readcount_use[
    ,
    c("F1_1", "P92a_1", "P59a_1", "HOCM4_1")
]

matrix_readcount_p2 <- purrr::map(
    seq_len(nrow(samples_selected)), \(x) {
        downsample_matrix(
            matrix_readcount_use[
                , samples_selected[x, "sample", drop = TRUE],
                drop = FALSE
            ],
            proportion = c(samples_selected[x, "ratio", drop = TRUE])
        )
    }
) |>
    purrr::reduce(cbind)

matrix_rf_test <- calc_cpm(
    cbind(
        matrix_readcount_p1,
        matrix_readcount_p2
    )
)[
    rownames(matrix_readcount_use) %in% features_selected,
]

matrix_rf_test <- matrix_rf_test |>
    t() |>
    as.matrix() |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "sample")

rownames(matrix_rf_test) <- matrix_rf_test$sample
matrix_rf_test$sample <- NULL
colnames(matrix_rf_test) <- make.names(colnames(matrix_rf_test))

rf_prediction <- predict(rf_model, matrix_rf_test, type = "prob")

Visualize

Heatmap

# horizontal
ht <- ComplexHeatmap::Heatmap(
    matrix = matrix_heatmap |> as.matrix() |> t(),
    col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
    #
    cluster_rows = FALSE,
    show_row_dend = FALSE,
    cluster_columns = FALSE,
    show_column_dend = FALSE,
    #
    show_row_names = TRUE,
    row_labels = colnames(matrix_heatmap),
    row_names_side = c("left"),
    row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    #
    show_column_names = TRUE,
    column_names_side = c("bottom"),
    column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    column_names_rot = 90,
    #
    show_heatmap_legend = TRUE,
    heatmap_legend_param = list(
        title = "Score",
        title_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 6
        ),
        legend_direction = "vertical",
        labels_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        legend_height = grid::unit(12.5, "mm"),
        legend_width = grid::unit(1, "mm")
    ),
)

# draw
ComplexHeatmap::draw(ht)

Radar plot

purrr::map(rownames(matrix_heatmap), \(x) {
    data <- matrix_heatmap[
        x, c("HCM     ", "Healthy", "     ICM", "NICM"),
        drop = FALSE
    ]
    data <- data * 100

    plot_radar(data)
}) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 3)

R session info

devtools::session_info()

─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.3.1 (2023-06-16)
 os       macOS Ventura 13.5
 system   aarch64, darwin22.4.0
 ui       unknown
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/Chicago
 date     2023-08-12
 pandoc   2.19.2 @ /Users/jialei/.pyenv/shims/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────
 package              * version     date (UTC) lib source
 abind                  1.4-5       2016-07-21 [1] CRAN (R 4.3.0)
 BayesFactor            0.9.12-4.4  2022-07-05 [1] CRAN (R 4.3.0)
 beeswarm               0.4.0       2021-06-01 [1] CRAN (R 4.3.0)
 Biobase              * 2.60.0      2023-04-25 [1] Bioconductor
 BiocGenerics         * 0.46.0      2023-04-25 [1] Bioconductor
 BiocParallel           1.34.2      2023-05-22 [1] Bioconductor
 bitops                 1.0-7       2021-04-24 [1] CRAN (R 4.3.0)
 cachem                 1.0.8       2023-05-01 [1] CRAN (R 4.3.0)
 callr                  3.7.3       2022-11-02 [1] CRAN (R 4.3.0)
 circlize               0.4.15      2022-05-10 [1] CRAN (R 4.3.0)
 cli                    3.6.1       2023-03-23 [1] CRAN (R 4.3.0)
 clue                   0.3-64      2023-01-31 [1] CRAN (R 4.3.0)
 cluster                2.1.4       2022-08-22 [2] CRAN (R 4.3.1)
 coda                   0.19-4      2020-09-30 [1] CRAN (R 4.3.0)
 codetools              0.2-19      2023-02-01 [2] CRAN (R 4.3.1)
 colorspace             2.1-0       2023-01-23 [1] CRAN (R 4.3.0)
 ComplexHeatmap         2.16.0      2023-04-25 [1] Bioconductor
 crayon                 1.5.2       2022-09-29 [1] CRAN (R 4.3.0)
 DelayedArray           0.26.7      2023-07-28 [1] Bioconductor
 DESeq2               * 1.40.2      2023-06-23 [1] Bioconductor
 devtools               2.4.5.9000  2023-08-11 [1] Github (r-lib/devtools@163c3f2)
 digest                 0.6.33      2023-07-07 [1] CRAN (R 4.3.1)
 doParallel             1.0.17      2022-02-07 [1] CRAN (R 4.3.0)
 dplyr                * 1.1.2.9000  2023-07-19 [1] Github (tidyverse/dplyr@c963d4d)
 ellipsis               0.3.2       2021-04-29 [1] CRAN (R 4.3.0)
 evaluate               0.21        2023-05-05 [1] CRAN (R 4.3.0)
 extrafont            * 0.19        2023-01-18 [1] CRAN (R 4.3.0)
 extrafontdb            1.0         2012-06-11 [1] CRAN (R 4.3.0)
 fansi                  1.0.4       2023-01-22 [1] CRAN (R 4.3.0)
 farver                 2.1.1       2022-07-06 [1] CRAN (R 4.3.0)
 fastmap                1.1.1       2023-02-24 [1] CRAN (R 4.3.0)
 FNN                    1.1.3.2     2023-03-20 [1] CRAN (R 4.3.0)
 forcats              * 1.0.0.9000  2023-04-23 [1] Github (tidyverse/forcats@4a8525a)
 foreach                1.5.2       2022-02-02 [1] CRAN (R 4.3.0)
 fs                     1.6.3       2023-07-20 [1] CRAN (R 4.3.1)
 generics               0.1.3       2022-07-05 [1] CRAN (R 4.3.0)
 GenomeInfoDb         * 1.36.1      2023-06-21 [1] Bioconductor
 GenomeInfoDbData       1.2.10      2023-04-23 [1] Bioconductor
 GenomicRanges        * 1.52.0      2023-04-25 [1] Bioconductor
 GetoptLong             1.0.5       2020-12-15 [1] CRAN (R 4.3.0)
 ggbeeswarm             0.7.2       2023-04-29 [1] CRAN (R 4.3.0)
 ggplot2              * 3.4.2.9000  2023-08-11 [1] Github (tidyverse/ggplot2@2cd0e96)
 ggrastr                1.0.2       2023-06-01 [1] CRAN (R 4.3.0)
 GlobalOptions          0.1.2       2020-06-10 [1] CRAN (R 4.3.0)
 glue                   1.6.2.9000  2023-04-23 [1] Github (tidyverse/glue@cbac82a)
 gtable                 0.3.3.9000  2023-04-23 [1] Github (r-lib/gtable@c56fd4f)
 hms                    1.1.3       2023-03-21 [1] CRAN (R 4.3.0)
 htmltools              0.5.6       2023-08-10 [1] CRAN (R 4.3.1)
 htmlwidgets            1.6.2       2023-03-17 [1] CRAN (R 4.3.0)
 IRanges              * 2.34.1      2023-06-22 [1] Bioconductor
 irlba                  2.3.5.1     2022-10-03 [1] CRAN (R 4.3.0)
 isoband                0.2.7       2022-12-20 [1] CRAN (R 4.3.0)
 iterators              1.0.14      2022-02-05 [1] CRAN (R 4.3.0)
 jpeg                   0.1-10      2022-11-29 [1] CRAN (R 4.3.0)
 jsonlite               1.8.7       2023-06-29 [1] CRAN (R 4.3.1)
 knitr                  1.43        2023-05-25 [1] CRAN (R 4.3.0)
 labeling               0.4.2       2020-10-20 [1] CRAN (R 4.3.0)
 lattice                0.21-8      2023-04-05 [2] CRAN (R 4.3.1)
 lifecycle              1.0.3       2022-10-07 [1] CRAN (R 4.3.0)
 locfit                 1.5-9.8     2023-06-11 [1] CRAN (R 4.3.0)
 lubridate            * 1.9.2.9000  2023-07-22 [1] Github (tidyverse/lubridate@cae67ea)
 magick                 2.7.5       2023-08-07 [1] CRAN (R 4.3.1)
 magrittr               2.0.3       2022-03-30 [1] CRAN (R 4.3.0)
 MASS                   7.3-60      2023-05-04 [2] CRAN (R 4.3.1)
 Matrix               * 1.6-0       2023-07-08 [2] CRAN (R 4.3.1)
 MatrixGenerics       * 1.12.3      2023-07-30 [1] Bioconductor
 MatrixModels           0.5-2       2023-07-10 [1] CRAN (R 4.3.1)
 matrixStats          * 1.0.0       2023-06-02 [1] CRAN (R 4.3.0)
 memoise                2.0.1       2021-11-26 [1] CRAN (R 4.3.0)
 munsell                0.5.0       2018-06-12 [1] CRAN (R 4.3.0)
 mvtnorm                1.2-2       2023-06-08 [1] CRAN (R 4.3.0)
 patchwork            * 1.1.2.9000  2023-08-11 [1] Github (thomasp85/patchwork@bd57553)
 pbapply                1.7-2       2023-06-27 [1] CRAN (R 4.3.1)
 pillar                 1.9.0       2023-03-22 [1] CRAN (R 4.3.0)
 pkgbuild               1.4.2       2023-06-26 [1] CRAN (R 4.3.1)
 pkgconfig              2.0.3       2019-09-22 [1] CRAN (R 4.3.0)
 pkgload                1.3.2.9000  2023-07-05 [1] Github (r-lib/pkgload@3cf9896)
 plyr                   1.8.8       2022-11-11 [1] CRAN (R 4.3.0)
 png                    0.1-8       2022-11-29 [1] CRAN (R 4.3.0)
 prettyunits            1.1.1.9000  2023-04-23 [1] Github (r-lib/prettyunits@8706d89)
 pROC                   1.18.4      2023-07-06 [1] CRAN (R 4.3.1)
 processx               3.8.2       2023-06-30 [1] CRAN (R 4.3.1)
 ps                     1.7.5       2023-04-18 [1] CRAN (R 4.3.0)
 purrr                * 1.0.2.9000  2023-08-11 [1] Github (tidyverse/purrr@ac4f5a9)
 R.cache                0.16.0      2022-07-21 [1] CRAN (R 4.3.0)
 R.methodsS3            1.8.2       2022-06-13 [1] CRAN (R 4.3.0)
 R.oo                   1.25.0      2022-06-12 [1] CRAN (R 4.3.0)
 R.utils                2.12.2      2022-11-11 [1] CRAN (R 4.3.0)
 R6                     2.5.1.9000  2023-04-23 [1] Github (r-lib/R6@e97cca7)
 ragg                   1.2.5       2023-01-12 [1] CRAN (R 4.3.0)
 randomForest           4.7-1.1     2022-05-23 [1] CRAN (R 4.3.1)
 RColorBrewer           1.1-3       2022-04-03 [1] CRAN (R 4.3.0)
 Rcpp                   1.0.11      2023-07-06 [1] CRAN (R 4.3.1)
 RCurl                  1.98-1.12   2023-03-27 [1] CRAN (R 4.3.0)
 readr                * 2.1.4.9000  2023-08-03 [1] Github (tidyverse/readr@80e4dc1)
 remotes                2.4.2.9000  2023-06-09 [1] Github (r-lib/remotes@8875171)
 reticulate             1.31        2023-08-10 [1] CRAN (R 4.3.1)
 rjson                  0.2.21      2022-01-09 [1] CRAN (R 4.3.0)
 rlang                  1.1.1.9000  2023-06-09 [1] Github (r-lib/rlang@c55f602)
 rmarkdown              2.23.4      2023-07-27 [1] Github (rstudio/rmarkdown@054d735)
 rstudioapi             0.15.0.9000 2023-07-19 [1] Github (rstudio/rstudioapi@feceaef)
 Rttf2pt1               1.3.12      2023-01-22 [1] CRAN (R 4.3.0)
 S4Arrays               1.0.5       2023-07-24 [1] Bioconductor
 S4Vectors            * 0.38.1      2023-05-02 [1] Bioconductor
 scales                 1.2.1       2022-08-20 [1] CRAN (R 4.3.0)
 sessioninfo            1.2.2       2021-12-06 [1] CRAN (R 4.3.0)
 shape                  1.4.6       2021-05-19 [1] CRAN (R 4.3.0)
 stringi                1.7.12      2023-01-11 [1] CRAN (R 4.3.0)
 stringr              * 1.5.0.9000  2023-08-11 [1] Github (tidyverse/stringr@08ff36f)
 styler               * 1.10.1      2023-07-17 [1] Github (r-lib/styler@aca7223)
 SummarizedExperiment * 1.30.2      2023-06-06 [1] Bioconductor
 systemfonts            1.0.4       2022-02-11 [1] CRAN (R 4.3.0)
 textshaping            0.3.6       2021-10-13 [1] CRAN (R 4.3.0)
 tibble               * 3.2.1.9005  2023-05-28 [1] Github (tidyverse/tibble@4de5c15)
 tidyr                * 1.3.0.9000  2023-04-23 [1] Github (tidyverse/tidyr@0764e65)
 tidyselect             1.2.0       2022-10-10 [1] CRAN (R 4.3.0)
 tidyverse            * 2.0.0.9000  2023-04-23 [1] Github (tidyverse/tidyverse@8ec2e1f)
 timechange             0.2.0       2023-01-11 [1] CRAN (R 4.3.0)
 tzdb                   0.4.0       2023-05-12 [1] CRAN (R 4.3.0)
 usethis                2.2.2.9000  2023-07-11 [1] Github (r-lib/usethis@467ff57)
 utf8                   1.2.3       2023-01-31 [1] CRAN (R 4.3.0)
 uwot                   0.1.16.9000 2023-06-30 [1] Github (jlmelville/uwot@cef28ef)
 vctrs                  0.6.3       2023-06-14 [1] CRAN (R 4.3.0)
 vipor                  0.4.5       2017-03-22 [1] CRAN (R 4.3.0)
 wesanderson            0.3.6.9000  2023-07-25 [1] Github (karthik/wesanderson@95d49de)
 withr                  2.5.0       2022-03-03 [1] CRAN (R 4.3.0)
 xfun                   0.40        2023-08-09 [1] CRAN (R 4.3.1)
 XVector                0.40.0      2023-04-25 [1] Bioconductor
 yaml                   2.3.7       2023-01-23 [1] CRAN (R 4.3.0)
 yarrr                  0.1.6       2023-04-23 [1] Github (ndphillips/yarrr@e2e4488)
 zlibbioc               1.46.0      2023-04-25 [1] Bioconductor

 [1] /opt/homebrew/lib/R/4.3/site-library
 [2] /opt/homebrew/Cellar/r/4.3.1/lib/R/library

─ Python configuration ───────────────────────────────────────────────────────
 python:         /Users/jialei/.pyenv/shims/python
 libpython:      /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/libpython3.10.dylib
 pythonhome:     /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3:/Users/jialei/.pyenv/versions/mambaforge-22.9.0-3
 version:        3.10.9 | packaged by conda-forge | (main, Feb  2 2023, 20:26:08) [Clang 14.0.6 ]
 numpy:          /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/python3.10/site-packages/numpy
 numpy_version:  1.24.3
 numpy:          /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/python3.10/site-packages/numpy
 
 NOTE: Python version was forced by RETICULATE_PYTHON

──────────────────────────────────────────────────────────────────────────────

Citation

BibTeX citation:

@article{bhattacharyya2023,
  author = {Bhattacharyya, Samadrita and Duan, Jialei and J. Vela, Ryan
    and Bhakta, Minoti and Bajona, Pietro and P.A. Mammen, Pradeep and
    C. Hon, Gary and V. Munshi, Nikhil},
  publisher = {American Heart Association},
  title = {Accurate {Classification} of {Cardiomyopathy} {Diagnosis} by
    {Chromatin} {Accessibility}},
  journal = {Circulation},
  volume = {146},
  number = {11},
  pages = {878 - 881},
  date = {2023-08-12},
  url = {https://doi.org/10.1161/CIRCULATIONAHA.122.059659},
  doi = {10.1161/CIRCULATIONAHA.122.059659},
  issn = {0009-7322},
  langid = {en},
  abstract = {This study establishes proof-of-concept for a
    cardiomyopathy diagnostic algorithm using chromatin accessibility
    signatures at a sequencing depth achievable by benchtop
    instruments.}
}

For attribution, please cite this work as:

Bhattacharyya, Samadrita, Jialei Duan, Ryan J. Vela, Minoti Bhakta, Pietro Bajona, Pradeep P.A. Mammen, Gary C. Hon, and Nikhil V. Munshi. 2023. “Accurate Classification of Cardiomyopathy Diagnosis by Chromatin Accessibility.” Circulation 146 (11): 878–81. https://doi.org/10.1161/CIRCULATIONAHA.122.059659.