Dataset Summary

Published

Sat Aug 12, 2023 07:27:51-05:00


[1] "2023-08-12 07:27:49 CDT"
[1] "America/Chicago"


Code
dataset |>
    dplyr::group_by(category) |>
    dplyr::mutate(
        num_reads_uniq_percentage = num_reads_uniq / num_reads_trimmed,
        num_reads_uniq_percentage_all = num_reads_uniq / num_reads_raw,
        #
        num_reads_deduped_percentage = num_reads_deduped / num_reads_uniq,
        num_reads_deduped_percentage_all = num_reads_deduped / num_reads_raw,
        #
        num_reads_q10_percentage = num_reads_q10 / num_reads_deduped,
        num_reads_q10_percentage_all = num_reads_q10 / num_reads_raw,
    ) |>
    dplyr::summarise(
        # num_samples = dplyr::n(),
        median_reads = median(num_reads_raw),
        median_reads_trimmed = median(num_reads_trimmed),
        #
        median_reads_uniq = median(num_reads_uniq),
        median_reads_uniq_percentage = median(num_reads_uniq_percentage),
        median_reads_uniq_percentage_all = median(
            num_reads_uniq_percentage_all
        ),
        #
        median_reads_deduped = median(num_reads_deduped),
        median_reads_deduped_percentage = median(num_reads_deduped_percentage),
        median_reads_deduped_percentage_all = median(
            num_reads_deduped_percentage_all
        ),
        #
        median_reads_q10 = median(num_reads_q10),
        median_reads_q10_percentage = median(num_reads_q10_percentage),
        median_reads_q10_percentage_all = median(
            num_reads_q10_percentage_all
        ),
    ) |>
    dplyr::mutate(
        category = factor(
            category,
            levels = c("Healthy", "ICM", "NICM", "HCM")
        )
    ) |>
    dplyr::arrange(category) |>
    gt::gt() |>
    gt::tab_header(
        title = gt::md("**Dataset Summary**")
    ) |>
    gt::tab_spanner(
        label = "No. reads",
        columns = c(median_reads, median_reads_trimmed)
    ) |>
    gt::tab_spanner(
        label = "Unique alignment",
        columns = c(
            median_reads_uniq,
            median_reads_uniq_percentage,
            median_reads_uniq_percentage_all
        )
    ) |>
    gt::tab_spanner(
        label = "Q10",
        columns = c(
            median_reads_q10,
            median_reads_q10_percentage,
            median_reads_q10_percentage_all
        )
    ) |>
    gt::tab_spanner(
        label = "Duplicate removal",
        columns = median_reads_deduped:median_reads_deduped_percentage_all
    ) |>
    gt::tab_footnote(
        footnote = gt::md("Median of each group"),
        locations = gt::cells_column_spanners(
            spanners = c(
                "No. reads",
                "Unique alignment",
                "Duplicate removal",
                "Q10"
            )
        )
    ) |>
    gt::tab_footnote(
        footnote = gt::md("Percentage of `Trimmed` reads"),
        locations = gt::cells_column_labels(
            columns = median_reads_uniq_percentage
        )
    ) |>
    gt::tab_footnote(
        footnote = gt::md("Percentage of total `Raw` reads"),
        locations = gt::cells_column_labels(
            columns = median_reads_uniq_percentage_all
        )
    ) |>
    gt::tab_footnote(
        footnote = gt::md("Percentage of `Unique alignment` reads"),
        locations = gt::cells_column_labels(
            columns = median_reads_deduped_percentage
        )
    ) |>
    gt::tab_footnote(
        footnote = gt::md("Percentage of total `Raw` reads"),
        locations = gt::cells_column_labels(
            columns = median_reads_deduped_percentage_all
        )
    ) |>
    gt::tab_footnote(
        footnote = gt::md(
            "Percentage of `Duplicate removal` reads"
        ),
        locations = gt::cells_column_labels(
            columns = median_reads_q10_percentage
        )
    ) |>
    gt::tab_footnote(
        footnote = gt::md("Percentage of total `Raw` reads"),
        locations = gt::cells_column_labels(
            columns = median_reads_q10_percentage_all
        )
    ) |>
    gt::fmt_number(
        columns = c(
            median_reads:median_reads_uniq,
            median_reads_deduped,
            median_reads_q10
        ),
        decimals = 2,
        suffixing = TRUE
    ) |>
    gt::fmt_percent(
        columns = c(
            median_reads_uniq_percentage:median_reads_uniq_percentage_all,
            median_reads_deduped_percentage:median_reads_deduped_percentage_all,
            median_reads_q10_percentage:median_reads_q10_percentage_all
        ),
        decimals = 1
    ) |>
    gt::data_color(
        columns = c(category),
        colors = scales::col_factor(
            palette = yarrr::piratepal(palette = "google") |> as.character(),
            domain = NULL
        )
    ) |>
    gt::cols_label(
        category = "",
        #
        median_reads = "Raw",
        median_reads_trimmed = "Trimmed",
        #
        median_reads_uniq = gt::html("#"),
        median_reads_uniq_percentage = gt::html("%"),
        median_reads_uniq_percentage_all = gt::html("%"),
        #
        median_reads_deduped = gt::html("#"),
        median_reads_deduped_percentage = gt::html("%"),
        median_reads_deduped_percentage_all = gt::html("%"),
        #
        median_reads_q10 = gt::html("#"),
        median_reads_q10_percentage = gt::html("%"),
        median_reads_q10_percentage_all = gt::html("%"),
    ) |>
    gt::opt_footnote_marks(marks = "extended") |>
    gt::tab_options() |>
    gt::tab_style(
        style = gt::cell_text(size = gt::pct(85)),
        locations = list(
            # gt::cells_title(),
            gt::cells_column_spanners(spanners = gt::everything()),
            gt::cells_body(), gt::cells_column_labels(),
            gt::cells_footnotes()
        )
    )
Dataset Summary
No. reads* Unique alignment* Duplicate removal* Q10*
Raw Trimmed # % % # %§ % # % %
Healthy 74.33M 74.19M 73.37M 98.7% 98.6% 48.12M 67.8% 66.9% 43.51M 90.9% 60.5%
ICM 75.58M 75.45M 74.25M 98.7% 98.6% 44.91M 54.2% 53.4% 40.67M 90.6% 48.2%
NICM 74.85M 74.71M 73.32M 98.4% 98.3% 45.81M 62.8% 61.6% 41.42M 90.4% 55.6%
HCM 72.09M 71.96M 71.01M 98.8% 98.6% 51.36M 75.1% 74.0% 46.50M 90.4% 66.9%
* Median of each group
Percentage of Trimmed reads
Percentage of total Raw reads
§ Percentage of Unique alignment reads
Percentage of Duplicate removal reads
Code
dataset |>
    dplyr::select(sample, category, num_reads_raw, num_reads_q10) |>
    dplyr::mutate(
        ratio = num_reads_q10 / num_reads_raw,
        category = factor(
            category,
            levels = c("Healthy", "ICM", "NICM", "HCM")
        )
    ) |>
    gt::gt() |> 
    gt::grand_summary_rows(
        columns = num_reads_raw:num_reads_q10,
        fns = c(
            Average = ~ mean(.)
        ),
        formatter = gt::fmt_number,
        decimals = 2,
        suffixing = TRUE
    ) |> 
    gt::grand_summary_rows(
        columns = ratio,
        fns = c(
            Average = ~ mean(.)
        ),
        formatter = gt::fmt_percent,
        decimals = 2
    ) |>
    gt::tab_spanner(
        # label = "No. reads",
        label = gt::md("**No. reads**"),
        columns = c(num_reads_raw, num_reads_q10)
    ) |>
    gt::tab_header(
        title = gt::md("**Sample Overview**")
    ) |>
    gt::data_color(
        columns = c(category),
        colors = scales::col_factor(
            palette = yarrr::piratepal(palette = "google") |> as.character(),
            domain = NULL
        )
    ) |>
    gt::data_color(
        columns = c(num_reads_raw),
        colors = scales::col_numeric(
            palette = c(
                "green", "orange", "red"
            ),
            domain = NULL
        )
    ) |>
    gt::data_color(
        columns = c(ratio),
        colors = scales::col_numeric(
            palette = viridis::viridis(n = 5, option = "F"),
            domain = NULL
        )
    ) |>
    gt::fmt_number(
        columns = c(num_reads_raw, num_reads_q10),
        decimals = 2,
        suffixing = TRUE
    ) |>
    gt::fmt_percent(
        columns = c(ratio),
        decimals = 1
    ) |>
    gt::cols_label(
        sample = gt::md("**Sample**"),
        category = gt::md("Category"),
        num_reads_raw = gt::md("Raw"),
        num_reads_q10 = gt::md("Q10"),
        ratio = gt::md("Percentage")
    ) |>
    gt::tab_options(
        # table.background.color = gt::adjust_luminance("LemonChiffon", steps = 2),
        # table.background.color = gt::adjust_luminance("#FFEFE5", steps = 2),
        table.background.color = NULL,
        grand_summary_row.background.color = "lightblue"
    ) |>
    gt::tab_style(
        style = gt::cell_text(size = gt::pct(95)),
        locations = list(
            # gt::cells_title(),
            gt::cells_column_spanners(spanners = gt::everything()),
            gt::cells_body(), gt::cells_column_labels()
        )
    )
Sample Overview
Sample Category No. reads Percentage
Raw Q10
F1_1 Healthy 60.96M 39.41M 64.6%
F1_2 Healthy 57.97M 39.01M 67.3%
F2_1 Healthy 54.16M 36.17M 66.8%
F2_2 Healthy 143.93M 80.99M 56.3%
F5_1 Healthy 55.53M 37.15M 66.9%
F5_2 Healthy 64.04M 46.27M 72.2%
P3_1 Healthy 78.51M 43.62M 55.6%
P3_2 Healthy 80.16M 42.48M 53.0%
P5_1 Healthy 100.59M 44.28M 44.0%
P5_2 Healthy 99.93M 43.40M 43.4%
P6_1 Healthy 74.43M 45.71M 61.4%
P6_2 Healthy 74.22M 44.25M 59.6%
P104a_1 ICM 61.77M 30.83M 49.9%
P104a_2 ICM 71.35M 32.92M 46.1%
P117b_1 ICM 66.44M 41.34M 62.2%
P117b_2 ICM 76.84M 46.66M 60.7%
P123b_1 ICM 80.77M 52.35M 64.8%
P123b_2 ICM 82.55M 54.31M 65.8%
P131a_1 ICM 88.48M 39.99M 45.2%
P131a_2 ICM 68.51M 29.84M 43.6%
P92a_1 ICM 99.68M 45.23M 45.4%
P92a_2 ICM 74.32M 34.56M 46.5%
P114b_1 NICM 69.49M 37.01M 53.3%
P114b_2 NICM 74.18M 39.39M 53.1%
P59a_1 NICM 72.52M 47.01M 64.8%
P59a_2 NICM 75.52M 53.73M 71.1%
P60a_1 NICM 75.84M 40.87M 53.9%
P60a_2 NICM 77.98M 40.17M 51.5%
P73a_1 NICM 109.71M 57.57M 52.5%
P73a_2 NICM 81.45M 46.64M 57.3%
P87a_1 NICM 68.50M 41.96M 61.3%
P87a_2 NICM 52.51M 30.55M 58.2%
HOCM4_1 HCM 63.49M 44.04M 69.4%
HOCM4_2 HCM 64.41M 43.67M 67.8%
HOCM6_1 HCM 64.87M 37.19M 57.3%
HOCM6_2 HCM 80.75M 47.82M 59.2%
HOCM9_1 HCM 81.73M 60.28M 73.8%
HOCM9_2 HCM 74.96M 49.82M 66.5%
HOCM11_1 HCM 65.22M 45.17M 69.3%
HOCM11_2 HCM 77.67M 52.24M 67.3%
HOCM7_1 HCM 69.23M 42.03M 60.7%
HOCM7_2 HCM 80.77M 48.10M 59.6%
Average 76.09M 44.19M 58.79%