Data preparation

SEED_USE <- 20200616
MINIMAL_NUM_COUNTS_REQUIRED_FOR_GENE <- 1 # 60

Functions loading

source(
    file = file.path(
        SCRIPT_DIR,
        "utilities.R"
    )
)

Data loading

Prepare metadata for single cells.

# https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE138688
geo_info <- tibble::tribble(
    ~sample_name,                ~sample_annotation,         ~sample_annotation2, ~cell_line,
    "GSM4116149",      "H9 primed TSC Bulk RNA-seq", "Primed hPSC (hTSC medium)",       "H9",
    "GSM4116150",      "AN primed TSC Bulk RNA-seq", "Primed hPSC (hTSC medium)",       "AN",
    "GSM4116151",          "H9 primed Bulk RNA-seq",               "Primed hPSC",       "H9",
    "GSM4116152",          "AN primed Bulk RNA-seq",               "Primed hPSC",       "AN",
    "GSM4116153",  "H9 primed StemPro Bulk RNA-seq",            "Re-primed hPSC",       "H9",
    "GSM4116154",   "H9 capacitated 1 Bulk RNA-seq",          "Capacitated hPSC",       "H9",
    "GSM4116155",   "H9 capacitated 2 Bulk RNA-seq",          "Capacitated hPSC",       "H9",
    "GSM4116156",           "H9 naïve Bulk RNA-seq",                "Naïve hPSC",       "H9",
    "GSM4116157",           "AN naïve Bulk RNA-seq",                "Naïve hPSC",       "AN",
    "GSM4116158",       "H9 naïve TSC Bulk RNA-seq",                "Naïve hTSC",       "H9",
    "GSM4116159",       "AN naïve TSC Bulk RNA-seq",                "Naïve hTSC",       "AN",
    "GSM4116160",    "WIBR3 naïve TSC Bulk RNA-seq",                "Naïve hTSC",    "WIBR3",
    "GSM4116161",             "H9 EVT Bulk RNA-seq",                       "EVT",       "H9",
    "GSM4116162",             "AN EVT Bulk RNA-seq",                       "EVT",       "AN",
    "GSM4116163",             "H9 STB Bulk RNA-seq",                       "STB",       "H9",
    "GSM4116164",             "AN STB Bulk RNA-seq",                       "STB",       "AN",
    "GSM4276363", "H9 naïve TSC rep 2 Bulk RNA-seq",                "Naïve hTSC",       "H9",
    "GSM4276364", "AN naïve TSC rep 2 Bulk RNA-seq",                "Naïve hTSC",       "AN",
    "GSM4276365",     "BT5 hTSC rep 1 Bulk RNA-seq",                 "BT5 hTSCs",      "BT5",
    "GSM4276366",     "BT5 hTSC rep 2 Bulk RNA-seq",                 "BT5 hTSCs",      "BT5"
)

cell_metadata <- read_delim(
    file = "../SraRunTable.txt",
    delim = ","
) %>%
    rename_all(. %>% tolower()) %>%
    `colnames<-`(str_replace(
        string = colnames(.),
        pattern = " ", replacement = "_"
    )) %>%
    select(
        sample_name,
        run,
        biosample,
        source_name
    ) %>%
    mutate(
        source_name = str_remove(string = source_name, pattern = "\\s.+$"),
        source_name = factor(
            source_name,
            levels = c("hPSC", "hTSC", "EVT", "STB"),
        )
    ) %>%
    left_join(geo_info) %>%
    mutate(cell_line = factor(cell_line))

## Parsed with column specification:
## cols(
##   .default = col_character(),
##   AvgSpotLen = col_double(),
##   Bases = col_double(),
##   Bytes = col_double(),
##   ReleaseDate = col_datetime(format = "")
## )

## See spec(...) for full column specifications.

## Joining, by = "sample_name"

cell_metadata

Load featureCounts output.

matrix_readcount_use <- read_delim(
    file = "../matrix/xaa_Aligned.sortedByCoord.out_deduped_q10_gene_id_featureCounts.txt.gz",
    delim = "\t",
    col_names = TRUE,
    skip = 1
) %>%
    select(-(2:6))

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   Geneid = col_character(),
##   Chr = col_character(),
##   Start = col_character(),
##   End = col_character(),
##   Strand = col_character()
## )

## See spec(...) for full column specifications.

colnames(matrix_readcount_use) <- colnames(matrix_readcount_use) %>%
    str_remove(
        pattern = "_rnaseq/aln/Aligned.sortedByCoord.out_deduped.+.bam"
    ) %>%
    str_remove(pattern = "../aln/")

matrix_readcount_use <- Matrix(
    data = as.matrix(matrix_readcount_use[, -1]),
    dimnames = list(
        matrix_readcount_use$Geneid,
        colnames(matrix_readcount_use)[-1]
    ),
    sparse = TRUE
)

matrix_readcount_use <- matrix_readcount_use[, colnames(matrix_readcount_use)]
matrix_readcount_use %>%
    dim()

## [1] 33538    20

stopifnot(
    rownames(matrix_readcount_use) == gene_symbo_info$X1
)
rownames(matrix_readcount_use) <- paste(
    rownames(matrix_readcount_use),
    gene_symbo_info$X2,
    sep = "_"
)

colnames(matrix_readcount_use) <- colnames(matrix_readcount_use) %>%
    enframe(value = "run") %>%
    left_join(cell_metadata, by = c("run" = "run")) %>%
    pull("sample_name")


matrix_cpm_use <- calc_cpm(m = matrix_readcount_use)

Preprocessing

Summarize sequencing statistics.

cell_metadata %>%
    mutate(
        num_umis = matrix_readcount_use[, .$sample_name] %>% colSums(),
        num_genes = (matrix_readcount_use[, .$sample_name] > 0) %>% colSums()
    ) %>%
    group_by(sample_annotation) %>%
    summarise(
        num_cells = n(),
        num_umis = (num_umis),
        num_genes = (num_genes)
    ) %>%
    arrange(sample_annotation) %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")

## `summarise()` ungrouping output (override with `.groups` argument)

sample_annotation	num_cells	num_umis	num_genes
AN EVT Bulk RNA-seq	1	8209039	19534
AN naïve Bulk RNA-seq	1	7666193	20839
AN naïve TSC Bulk RNA-seq	1	9752099	20212
AN naïve TSC rep 2 Bulk RNA-seq	1	16382259	21398
AN primed Bulk RNA-seq	1	9094100	20799
AN primed TSC Bulk RNA-seq	1	8236756	20098
AN STB Bulk RNA-seq	1	7353114	19141
BT5 hTSC rep 1 Bulk RNA-seq	1	16278669	21041
BT5 hTSC rep 2 Bulk RNA-seq	1	14363407	20488
H9 capacitated 1 Bulk RNA-seq	1	9243528	22011
H9 capacitated 2 Bulk RNA-seq	1	7636895	22550
H9 EVT Bulk RNA-seq	1	9821122	21067
H9 naïve Bulk RNA-seq	1	8245629	21031
H9 naïve TSC Bulk RNA-seq	1	8580435	19314
H9 naïve TSC rep 2 Bulk RNA-seq	1	17392727	21148
H9 primed Bulk RNA-seq	1	9129076	21250
H9 primed StemPro Bulk RNA-seq	1	9307244	21640
H9 primed TSC Bulk RNA-seq	1	11135121	21458
H9 STB Bulk RNA-seq	1	6521579	18933
WIBR3 naïve TSC Bulk RNA-seq	1	11118083	20274

Filter uninformative genes.

matrix_readcount_norm <- matrix_readcount_use
matrix_readcount_norm <- matrix_readcount_norm[
    Matrix::rowSums(
        matrix_readcount_norm
    ) >= MINIMAL_NUM_COUNTS_REQUIRED_FOR_GENE,
]
dim(matrix_readcount_norm)

## [1] 29438    20

Median normalize matrix.

matrix_readcount_norm@x <- median(colSums(matrix_readcount_norm)) *
    (matrix_readcount_norm@x / rep.int(
        colSums(matrix_readcount_norm),
        diff(matrix_readcount_norm@p)
    ))

Dimensionality reduction

PCA

matrix_readcount_norm_log <- matrix_readcount_norm
matrix_readcount_norm_log@x <- log1p(matrix_readcount_norm_log@x)

# z-score
matrix_readcount_norm_log_scaled <- t(
    scale(
        t(
            matrix_readcount_norm_log
        ),
        center = TRUE, scale = TRUE
    )
)
# features_var <- apply(matrix_readcount_norm_log_scaled, 1, var)
# features_use <- rownames(matrix_readcount_norm_log_scaled)[features_var > 0]

pca_out <- prcomp(
    t(matrix_readcount_norm_log_scaled),
    center = FALSE,
    scale = FALSE
)

summary(pca_out)$imp %>%
    t() %>%
    as.data.frame() %>%
    rownames_to_column(var = "component") %>%
    mutate(
        rank = 1:n()
    ) %>%
    slice(1:30) %>%
    ggplot(
        aes(
            x = rank,
            y = `Proportion of Variance`
        )
    ) +
    geom_point(size = 0.3) +
    theme_bw() +
    scale_x_continuous(
        name = "Component",
        breaks = c(1, seq(5, 30, 5))
    ) +
    scale_y_continuous(
        name = "Proportion of variance",
        labels = scales::percent
    ) +
    theme(
        axis.title = ggplot2::element_text(family = "Arial", size = 6),
        axis.text = ggplot2::element_text(family = "Arial", size = 6),
        panel.grid.minor = ggplot2::element_blank()
    )

file_name <- "Rplot_pca_variance_explained.pdf"
if (!file.exists(file_name)) {
    ggsave(
        filename = file_name,
        useDingbats = FALSE,
        plot = last_plot(),
        device = NULL,
        path = NULL,
        scale = 1,
        width = 55 * 1.5,
        height = 55,
        units = c("mm"),
    )
}

embedding <- pca_out$x %>%
    as.data.frame() %>%
    rownames_to_column(var = "sample_name") %>%
    left_join(cell_metadata) %>%
    select(
        sample_name,
        run,
        biosample,
        sample_annotation,
        sample_annotation2,
        source_name,
        cell_line,
        PC1,
        PC2,
        PC3
    ) %>%
    rename(
        x_pca = PC1,
        y_pca = PC2,
        z_pca = PC3
    )

## Joining, by = "sample_name"

p_embedding_pca1 <- embedding %>%
    mutate(
        category = "PCA"
    ) %>%
    sample_frac() %>%
    plot_pca(
        x = x_pca,
        y = y_pca,
        color = source_name,
        shape = cell_line
    ) +
    scale_color_manual(
        values = as.character(yarrr::piratepal(palette = "google"))
    ) +
    add_xy_label_pca()

p_embedding_pca2 <- embedding %>%
    mutate(
        category = "PCA"
    ) %>%
    sample_frac() %>%
    plot_pca(
        x = x_pca,
        y = z_pca,
        color = source_name,
        shape = cell_line
    ) +
    scale_color_manual(
        values = as.character(yarrr::piratepal(palette = "google"))
    ) +
    add_xy_label_pca(y = "PC3")

p_embedding_pca1 +
    p_embedding_pca2 +
    plot_annotation(theme = theme(plot.margin = margin())) +
    plot_layout(guides = "collect")

file_name <- "Rplot_embedding_pca.pdf"
if (!file.exists(file_name)) {
    ggsave(
        filename = file_name,
        useDingbats = FALSE,
        plot = last_plot(),
        device = NULL,
        path = NULL,
        scale = 1,
        width = 55 * 2.5,
        height = 55,
        units = c("mm"),
    )
}

t-SNE

N_COMPONENTS <- 15

set.seed(seed = SEED_USE)
embedding_rtsne <- Rtsne::Rtsne(
    X = pca_out$x[, 1:N_COMPONENTS],
    perplexity = 5,
    check_duplicates = FALSE,
    pca = FALSE,
    max_iter = 3000,
    verbose = TRUE
)$Y

## Read the 20 x 15 data matrix successfully!
## Using no_dims = 2, perplexity = 5.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 0.00 seconds (sparsity = 0.880000)!
## Learning embedding...
## Iteration 50: error is 66.679321 (50 iterations in 0.00 seconds)
## Iteration 100: error is 61.998136 (50 iterations in 0.00 seconds)
## Iteration 150: error is 60.044322 (50 iterations in 0.00 seconds)
## Iteration 200: error is 60.585197 (50 iterations in 0.00 seconds)
## Iteration 250: error is 55.630989 (50 iterations in 0.00 seconds)
## Iteration 300: error is 1.199230 (50 iterations in 0.00 seconds)
## Iteration 350: error is 0.458094 (50 iterations in 0.00 seconds)
## Iteration 400: error is 0.730441 (50 iterations in 0.00 seconds)
## Iteration 450: error is 0.392958 (50 iterations in 0.00 seconds)
## Iteration 500: error is 0.203769 (50 iterations in 0.00 seconds)
## Iteration 550: error is 0.194038 (50 iterations in 0.00 seconds)
## Iteration 600: error is 0.183251 (50 iterations in 0.00 seconds)
## Iteration 650: error is 0.166762 (50 iterations in 0.00 seconds)
## Iteration 700: error is 0.145477 (50 iterations in 0.00 seconds)
## Iteration 750: error is 0.117679 (50 iterations in 0.00 seconds)
## Iteration 800: error is 0.099077 (50 iterations in 0.00 seconds)
## Iteration 850: error is 0.083525 (50 iterations in 0.00 seconds)
## Iteration 900: error is 0.084415 (50 iterations in 0.00 seconds)
## Iteration 950: error is 0.081729 (50 iterations in 0.00 seconds)
## Iteration 1000: error is 0.076858 (50 iterations in 0.00 seconds)
## Iteration 1050: error is 0.081037 (50 iterations in 0.00 seconds)
## Iteration 1100: error is 0.084826 (50 iterations in 0.00 seconds)
## Iteration 1150: error is 0.088820 (50 iterations in 0.00 seconds)
## Iteration 1200: error is 0.082442 (50 iterations in 0.00 seconds)
## Iteration 1250: error is 0.077465 (50 iterations in 0.00 seconds)
## Iteration 1300: error is 0.084000 (50 iterations in 0.00 seconds)
## Iteration 1350: error is 0.082580 (50 iterations in 0.00 seconds)
## Iteration 1400: error is 0.083636 (50 iterations in 0.00 seconds)
## Iteration 1450: error is 0.087216 (50 iterations in 0.00 seconds)
## Iteration 1500: error is 0.083618 (50 iterations in 0.00 seconds)
## Iteration 1550: error is 0.078080 (50 iterations in 0.00 seconds)
## Iteration 1600: error is 0.072908 (50 iterations in 0.00 seconds)
## Iteration 1650: error is 0.079053 (50 iterations in 0.00 seconds)
## Iteration 1700: error is 0.077303 (50 iterations in 0.00 seconds)
## Iteration 1750: error is 0.075094 (50 iterations in 0.00 seconds)
## Iteration 1800: error is 0.088803 (50 iterations in 0.00 seconds)
## Iteration 1850: error is 0.086535 (50 iterations in 0.00 seconds)
## Iteration 1900: error is 0.080961 (50 iterations in 0.00 seconds)
## Iteration 1950: error is 0.087323 (50 iterations in 0.00 seconds)
## Iteration 2000: error is 0.088305 (50 iterations in 0.00 seconds)
## Iteration 2050: error is 0.079066 (50 iterations in 0.00 seconds)
## Iteration 2100: error is 0.074569 (50 iterations in 0.00 seconds)
## Iteration 2150: error is 0.089101 (50 iterations in 0.00 seconds)
## Iteration 2200: error is 0.085961 (50 iterations in 0.00 seconds)
## Iteration 2250: error is 0.081141 (50 iterations in 0.00 seconds)
## Iteration 2300: error is 0.087996 (50 iterations in 0.00 seconds)
## Iteration 2350: error is 0.081587 (50 iterations in 0.00 seconds)
## Iteration 2400: error is 0.079477 (50 iterations in 0.00 seconds)
## Iteration 2450: error is 0.087065 (50 iterations in 0.00 seconds)
## Iteration 2500: error is 0.083037 (50 iterations in 0.00 seconds)
## Iteration 2550: error is 0.087950 (50 iterations in 0.00 seconds)
## Iteration 2600: error is 0.088529 (50 iterations in 0.00 seconds)
## Iteration 2650: error is 0.078429 (50 iterations in 0.00 seconds)
## Iteration 2700: error is 0.090716 (50 iterations in 0.00 seconds)
## Iteration 2750: error is 0.087941 (50 iterations in 0.00 seconds)
## Iteration 2800: error is 0.075762 (50 iterations in 0.00 seconds)
## Iteration 2850: error is 0.088322 (50 iterations in 0.00 seconds)
## Iteration 2900: error is 0.085741 (50 iterations in 0.00 seconds)
## Iteration 2950: error is 0.085754 (50 iterations in 0.00 seconds)
## Iteration 3000: error is 0.088691 (50 iterations in 0.00 seconds)
## Fitting performed in 0.05 seconds.

embedding <- cbind(
    embedding,
    embedding_rtsne
) %>%
    rename(
        x_tsne = "1",
        y_tsne = "2"
    )

p_embedding_tsne <- embedding %>%
    mutate(
        category = "t-SNE"
    ) %>%
    sample_frac() %>%
    plot_pca(
        x = x_tsne,
        y = y_tsne,
        color = source_name,
        shape = cell_line
    ) +
    labs(x = "Dim 1", y = "Dim 2") +
    scale_color_manual(
        values = as.character(yarrr::piratepal(palette = "google"))
    )

embedding %>%
    dplyr::count(sample_annotation2) %>%
    gt::gt() %>%
    gt::cols_label(n = "num_cells") %>%
    gt::tab_options(table.font.size = "median")

sample_annotation2	num_cells
BT5 hTSCs	2
Capacitated hPSC	2
EVT	2
Naïve hPSC	2
Naïve hTSC	5
Primed hPSC	2
Primed hPSC (hTSC medium)	2
Re-primed hPSC	1
STB	2

Fig. 4A
Inlcuding three types of hPSCs (naïve, capacitated, and primed), blastocyst-derived BT5 hTSCs, naïve hTSCs and their differentiated progeny (EVT and STB), and primed hPSCs cultured in hTSC media.

p_embedding_pca1 + geom_text_repel(
    data = embedding,
    aes(x_pca,
        y_pca,
        label = sample_annotation2
    ),
    size = 1.8,
    family = "Arial",
    box.padding = .2,
    point.padding = .2,
    nudge_y = .15,
    arrow = arrow(length = unit(.02, "npc")),
    segment.color = "grey35",
    color = "black"
) +
    p_embedding_tsne + geom_text_repel(
        data = embedding,
        aes(x_tsne,
            y_tsne,
            label = sample_annotation2
        ),
        size = 1.8,
        family = "Arial",
        box.padding = .2,
        point.padding = .2,
        nudge_y = .15,
        arrow = arrow(length = unit(.02, "npc")),
        segment.color = "grey35",
        color = "black"
    ) +
    plot_annotation(theme = theme(plot.margin = margin())) +
    plot_layout(nrow = 1, guides = "collect")

file_name <- "Rplot_embedding.pdf"
if (!file.exists(file_name)) {
    ggsave(
        filename = file_name,
        useDingbats = FALSE,
        plot = last_plot(),
        device = NULL,
        path = NULL,
        scale = 1,
        width = 55 * 2.5,
        height = 55,
        units = c("mm"),
    )
}

Expression

Barplot

Part1

embedding %>%
    dplyr::count(source_name) %>%
    gt::gt() %>%
    gt::cols_label(n = "num_cells") %>%
    gt::tab_options(table.font.size = "median")

source_name	num_cells
hPSC	9
hTSC	7
EVT	2
STB	2

Fig. 4D

FEATURES_SELECTED <- c(
    "ENSG00000204531_POU5F1",
    "ENSG00000203909_DPPA5",
    "ENSG00000111704_NANOG",
    # "ENSG00000107485_GATA3",
    "ENSG00000197905_TEAD4",
    "ENSG00000162337_LRP5",
    "ENSG00000073282_TP63",
    "ENSG00000135374_ELF5",
    "ENSG00000126353_CCR7",
    "ENSG00000213949_ITGA1",
    "ENSG00000204632_HLA-G",
    "ENSG00000087245_MMP2",
    "ENSG00000123999_INHA",
    "ENSG00000231924_PSG1",
    "ENSG00000267631_CGB1",
    "ENSG00000140443_IGF1R",
    "ENSG00000197081_IGF2R",
    "ENSG00000135480_KRT7",
    "ENSG00000165556_CDX2"
)

map(FEATURES_SELECTED, function(x) {
    embedding %>%
        mutate(feature_value = matrix_cpm_use[x, sample_name]) %>%
        group_by(source_name) %>%
        summarise(mean_expr = mean(feature_value + 1)) %>%
        plot_barplot_feature(
            x = source_name,
            y = mean_expr,
            title = str_replace(string = x, pattern = "_", replacement = "\n")
        ) +
        scale_fill_manual(
            values = as.character(yarrr::piratepal(palette = "google"))
        )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(ncol = 3) +
    plot_annotation(
        tag_levels = c("A"),
        theme = theme(plot.margin = margin())
    ) &
    theme(
        plot.tag.position = c(0, 1),
        plot.tag = element_text(
            family = "Arial",
            size = 7,
            hjust = 0,
            vjust = 0
        )
    )

## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)

Part2

embedding %>%
    dplyr::count(sample_annotation2) %>%
    gt::gt() %>%
    gt::cols_label(n = "num_cells") %>%
    gt::tab_options(table.font.size = "median")

sample_annotation2	num_cells
BT5 hTSCs	2
Capacitated hPSC	2
EVT	2
Naïve hPSC	2
Naïve hTSC	5
Primed hPSC	2
Primed hPSC (hTSC medium)	2
Re-primed hPSC	1
STB	2

embedding %>%
    dplyr::count(cell_line) %>%
    gt::gt() %>%
    gt::cols_label(n = "num_cells") %>%
    gt::tab_options(table.font.size = "median")

cell_line	num_cells
AN	7
BT5	2
H9	10
WIBR3	1

embedding %>%
    dplyr::count(cell_line, source_name) %>%
    gt::gt() %>%
    gt::cols_label(n = "num_cells") %>%
    gt::tab_options(table.font.size = "median")

cell_line	source_name	num_cells
AN	hPSC	3
AN	hTSC	2
AN	EVT	1
AN	STB	1
BT5	hTSC	2
H9	hPSC	6
H9	hTSC	2
H9	EVT	1
H9	STB	1
WIBR3	hTSC	1

map(FEATURES_SELECTED, function(x) {
    embedding %>%
        mutate(feature_value = matrix_cpm_use[x, sample_name]) %>%
        group_by(cell_line, source_name) %>%
        summarise(mean_expr = mean(feature_value + 1)) %>%
        filter(cell_line %in% c("AN", "H9")) %>%
        plot_barplot_feature(
            x = source_name,
            y = mean_expr,
            z = "cell_line",
            title = x
        ) +
        scale_fill_manual(
            values = as.character(yarrr::piratepal(palette = "google"))
        )
}) %>%
    purrr::reduce(`+`) +
    plot_layout(ncol = 2) +
    plot_annotation(
        tag_levels = c("A"),
        theme = theme(plot.margin = margin())
    ) &
    theme(
        plot.tag.position = c(0, 1),
        plot.tag = element_text(
            family = "Arial",
            size = 7,
            hjust = 0,
            vjust = 0
        )
    )

## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)
## `summarise()` regrouping output by 'cell_line' (override with `.groups` argument)

Primed hPSC vs naïve hTSCs

Primed hPSC in hTSC media vs naïve hTSC

library("DESeq2")
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:Matrix':
## 
##     which
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:Matrix':
## 
##     expand
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:purrr':
## 
##     reduce
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:rlang':
## 
##     exprs
## Loading required package: DelayedArray
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:purrr':
## 
##     simplify
## The following objects are masked from 'package:base':
## 
##     aperm, apply, rowsum
library("BiocParallel")

# Primed hPSC in hTSC media: GSM4116149, GSM4116150
# naïve hTSC: GSM4116158, GSM4116159, GSM4116160, GSM4276363, GSM4276364

cts <- matrix_readcount_use[, c(
    "GSM4116149",
    "GSM4116150",
    "GSM4116158",
    "GSM4116159",
    "GSM4116160",
    "GSM4276363",
    "GSM4276364"
)]
col_data <- data.frame(
    sample = c(
        "Primed hPSC",
        "Primed hPSC",
        "naïve hTSC",
        "naïve hTSC",
        "naïve hTSC",
        "naïve hTSC",
        "naïve hTSC"
    ),
    row.names = c(
        "GSM4116149",
        "GSM4116150",
        "GSM4116158",
        "GSM4116159",
        "GSM4116160",
        "GSM4276363",
        "GSM4276364"
    )
) %>%
    mutate(
        sample = factor(
            sample,
            levels = c("naïve hTSC", "Primed hPSC")
        )
    )

dds <- DESeqDataSetFromMatrix(
    countData = cts,
    colData = col_data,
    design = ~sample
)
## converting counts to integer mode
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]

dds <- DESeq(dds)
## estimating size factors
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## fitting model and testing

deseq2_out <- results(dds)
# mcols(deseq2_out)
deseq2_out
## log2 fold change (MLE): sample Primed.hPSC vs naïve.hTSC 
## Wald test p-value: sample Primed.hPSC vs naïve.hTSC 
## DataFrame with 33538 rows and 6 columns
##                              baseMean log2FoldChange     lfcSE       stat
##                             <numeric>      <numeric> <numeric>  <numeric>
## ENSG00000243485_MIR1302-2HG 0.0000000             NA        NA         NA
## ENSG00000237613_FAM138A     0.0000000             NA        NA         NA
## ENSG00000186092_OR4F5       0.0000000             NA        NA         NA
## ENSG00000238009_AL627309.1  0.0887492      -0.281881   4.18125 -0.0674156
## ENSG00000239945_AL627309.3  0.0000000             NA        NA         NA
## ...                               ...            ...       ...        ...
## ENSG00000277856_AC233755.2     0.0000             NA        NA         NA
## ENSG00000275063_AC233755.1     0.0000             NA        NA         NA
## ENSG00000271254_AC240274.1    59.8487       0.607067  0.659072   0.921094
## ENSG00000277475_AC213203.1     0.0000             NA        NA         NA
## ENSG00000268674_FAM231C        0.0000             NA        NA         NA
##                                pvalue      padj
##                             <numeric> <numeric>
## ENSG00000243485_MIR1302-2HG        NA        NA
## ENSG00000237613_FAM138A            NA        NA
## ENSG00000186092_OR4F5              NA        NA
## ENSG00000238009_AL627309.1   0.946251        NA
## ENSG00000239945_AL627309.3         NA        NA
## ...                               ...       ...
## ENSG00000277856_AC233755.2         NA        NA
## ENSG00000275063_AC233755.1         NA        NA
## ENSG00000271254_AC240274.1   0.357001  0.508974
## ENSG00000277475_AC213203.1         NA        NA
## ENSG00000268674_FAM231C            NA        NA

summary(deseq2_out)

## 
## out of 26766 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 4711, 18%
## LFC < 0 (down)     : 4152, 16%
## outliers [1]       : 12, 0.045%
## low counts [2]     : 6656, 25%
## (mean count < 2)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

FEATURES_TO_LABEL <- c(
    "TFAP2C",
    "GATA3",
    "TP63",
    "OVOL1",
    "KRT7",
    "GCM1",
    "CCR7",
    "ERVW-1",
    "CYP19A1",
    "DLX5",
    "ELF5",
    "MUC15",
    # ,
    "NEFM",
    "SOX11",
    "LSAMP",
    "GAP43",
    "MAP2",
    "EPHA3",
    "DCX",
    "VIM",
    "PAX3"
)

FEATURES_TO_LABEL <- rownames(
    matrix_readcount_use
)[gene_symbo_info$X2 %in% FEATURES_TO_LABEL]

deseq2_out %>%
    as.data.frame() %>%
    mutate(
        group = case_when(
            log2FoldChange >= 1 & padj <= 0.05 ~ "High in Primed hPSC",
            log2FoldChange <= -1 & padj <= 0.05 ~ "High in naïve hTSC",
            TRUE ~ "Not significant"
        )
    ) %>%
    plot_volcano(
        x = log2FoldChange,
        y = -log10(padj),
        z = group, xlim = c(-20, 20)
    ) +
    geom_text_repel(
        data = as.data.frame(deseq2_out)[FEATURES_TO_LABEL, ] %>%
            mutate(
                symbol = rownames(.),
                symbol = str_remove(string = symbol, pattern = "^E.+_")
            ),
        aes(log2FoldChange,
            -log10(padj),
            label = symbol
        ),
        size = 1.8,
        family = "Arial",
        color = "black",
        nudge_x = .15,
        box.padding = 0.5,
        nudge_y = 1,
        segment.curvature = -0.1,
        segment.ncp = 3,
        segment.angle = 20
    )

## Warning: Removed 13440 rows containing missing values (geom_point).

# check
SELECTED_FEATURE <- "ENSG00000078018_MAP2"
matrix_cpm_use[SELECTED_FEATURE, c(
    "GSM4116149",
    "GSM4116150",
    "GSM4116159",
    "GSM4116160",
    "GSM4276363",
    "GSM4276364"
)] %>%
    enframe(name = "sample", value = "CPM") %>%
    print(n = nrow(.))

## # A tibble: 6 x 2
##   sample         CPM
##   <chr>        <dbl>
## 1 GSM4116149 377.   
## 2 GSM4116150 528.   
## 3 GSM4116159   0.103
## 4 GSM4116160   0    
## 5 GSM4276363   0    
## 6 GSM4276364   0.183

Comparing with implantation data

Zhou, F., Wang, R., Yuan, P., Ren, Y., Mao, Y., Li, R., Lian, Y., Li, J., Wen, L., Yan, L., et al. (2019). Reconstituting the transcriptome and DNA methylome landscapes of human implantation. Nature 572, 660–664.

embedding_PRJNA431392 <- read_csv(
    file = "../../PRJNA431392/notebooks/embedding_ncomponents8_seed20200317.csv"
) %>%
    left_join(read_csv(
        file = "../../PRJNA431392/Supplementary_Table_2_Sample_Information.csv"
    ) %>%
        select(
            Sample,
            Day,
            IVC,
            Embryo,
            Ori_Day,
            Ori_Day_Emb,
            Sex,
            CNV,
            Lineage
        ) %>%
        rename_all(tolower),
    by = c("cell" = "sample")
    ) %>%
    mutate(
        developmental_stage = str_replace(
            string = day,
            pattern = "D",
            replacement = "E"
        ),
        developmental_stage = factor(
            developmental_stage,
            levels = c(str_sort(unique(developmental_stage), numeric = TRUE))
        ),
        #
        lineage = ifelse(lineage == "MIX", "ysTE", lineage),
        lineage = factor(lineage, levels = c("EPI", "PE", "TE", "ysTE"))
    )
## Parsed with column specification:
## cols(
##   cell = col_character(),
##   batch = col_character(),
##   louvain = col_double(),
##   x_tsne = col_double(),
##   y_tsne = col_double(),
##   x_umap = col_double(),
##   y_umap = col_double(),
##   x_fitsne = col_double(),
##   y_fitsne = col_double(),
##   x_phate = col_double(),
##   y_phate = col_double(),
##   `x_min_dist=0.1` = col_double(),
##   `y_min_dist=0.1` = col_double(),
##   x_multicoretsne = col_double(),
##   y_multicoretsne = col_double()
## )
## Parsed with column specification:
## cols(
##   Sample = col_character(),
##   Day = col_character(),
##   IVC = col_character(),
##   Embryo = col_character(),
##   chrM_Ratio = col_double(),
##   ERCC_Ratio = col_double(),
##   GeneNum = col_double(),
##   Ori_Day = col_character(),
##   Ori_Day_Emb = col_character(),
##   Sex = col_character(),
##   Pseudotime = col_double(),
##   CNV = col_character(),
##   Lineage = col_character()
## )

embedding_PRJNA431392 %>% head()

reticulate::py_discover_config()
## python:         /Users/jialei/.pyenv/shims/python
## libpython:      /Users/jialei/.pyenv/versions/3.8.2/lib/libpython3.8.dylib
## pythonhome:     /Users/jialei/.pyenv/versions/3.8.2:/Users/jialei/.pyenv/versions/3.8.2
## version:        3.8.2 (default, May 23 2020, 03:35:41)  [Clang 11.0.3 (clang-1103.0.32.62)]
## numpy:          /Users/jialei/.pyenv/versions/3.8.2/lib/python3.8/site-packages/numpy
## numpy_version:  1.19.0
## 
## NOTE: Python version was forced by RETICULATE_PYTHON

np <- reticulate::import("numpy", convert = TRUE)
scipy.sparse <- reticulate::import(module = "scipy.sparse", convert = TRUE)

matrix_readcount_PRJNA431392 <- scipy.sparse$load_npz("../../PRJNA431392/matrix_readcount.npz")
matrix_readcount_PRJNA431392_features <- np$load("../../PRJNA431392/matrix_readcount_features.npy")
matrix_readcount_PRJNA431392_barcodes <- np$load("../../PRJNA431392/matrix_readcount_barcodes.npy")
colnames(matrix_readcount_PRJNA431392) <- matrix_readcount_PRJNA431392_barcodes
rownames(matrix_readcount_PRJNA431392) <- matrix_readcount_PRJNA431392_features
matrix_readcount_PRJNA431392 <- matrix_readcount_PRJNA431392[, embedding_PRJNA431392$cell]

# calculate CPM
matrix_cpm_PRJNA431392 <- calc_cpm(matrix_readcount_PRJNA431392)

stopifnot(
    dim(matrix_readcount_PRJNA431392) == dim(matrix_cpm_PRJNA431392)
)
print(dim(matrix_readcount_PRJNA431392))
## [1] 33538  3184

levels(embedding_PRJNA431392$developmental_stage)
## [1] "E6"  "E8"  "E10" "E12"
levels(embedding_PRJNA431392$lineage)
## [1] "EPI"  "PE"   "TE"   "ysTE"
LINEAGES_SELECTED <- c("EPI", "PE", "TE")

matrix_readcount_PRJNA431392_pseudo_bulk <- map(
    levels(embedding_PRJNA431392$developmental_stage), function(x) {
        cat(x, "\n")
        map(LINEAGES_SELECTED, function(y) {
            cells_in_group <- embedding_PRJNA431392 %>%
                filter(
                    developmental_stage == x,
                    lineage == y
                ) %>%
                pull(cell)
            rowSums(matrix_readcount_PRJNA431392[, cells_in_group])
        }) %>%
            bind_cols()
    }
) %>%
    bind_cols()
## E6
## New names:
## * NA -> ...1
## * NA -> ...2
## * NA -> ...3
## E8
## New names:
## * NA -> ...1
## * NA -> ...2
## * NA -> ...3
## E10
## New names:
## * NA -> ...1
## * NA -> ...2
## * NA -> ...3
## E12
## New names:
## * NA -> ...1
## * NA -> ...2
## * NA -> ...3
## New names:
## * ...1 -> ...4
## * ...2 -> ...5
## * ...3 -> ...6
## * ...1 -> ...7
## * ...2 -> ...8
## * ...

colnames(matrix_readcount_PRJNA431392_pseudo_bulk) <- map(
    levels(embedding_PRJNA431392$developmental_stage), function(x) {
        map(LINEAGES_SELECTED, function(y) {
            y
        }) %>%
            unlist() %>%
            paste(x, ., sep = "_")
    }
) %>% unlist()

matrix_readcount_PRJNA431392_pseudo_bulk <- as.matrix(
    matrix_readcount_PRJNA431392_pseudo_bulk
)
rownames(matrix_readcount_PRJNA431392_pseudo_bulk) <- rownames(
    matrix_readcount_use
)

matrix_readcount_pseudo_bulk <- cbind(
    "H9_naïve_hTSC" = matrix_readcount_use[, c("GSM4116158", "GSM4276363")] %>% rowSums(),
    "AN_naïve_hTSC" = matrix_readcount_use[, c("GSM4116159", "GSM4276364")] %>% rowSums(),
    "WIBR3_naïve_hTSC" = matrix_readcount_use[, c("GSM4116160")],
    "BT5_hTSC" = matrix_readcount_use[, c("GSM4276365", "GSM4276366")] %>% rowSums(),
    matrix_readcount_PRJNA431392_pseudo_bulk
)
matrix_cpm_pseudo_bulk <- calc_cpm(matrix_readcount_pseudo_bulk)

corr_heatmap <- matrix_readcount_pseudo_bulk %>%
    cor(log10(. + 1), method = "spearman")
corr_heatmap[upper.tri(corr_heatmap)] <- NA

Fig. 4E
Whole transcriptome correlation.

corr_heatmap %>%
    as.data.frame() %>%
    rownames_to_column(var = "sample_x") %>%
    pivot_longer(
        -c("sample_x"),
        names_to = "sample_y"
    ) %>%
    as.data.frame() %>%
    filter(!is.na(value)) %>%
    mutate(
        sample_x = factor(
            sample_x,
            levels = colnames(matrix_readcount_pseudo_bulk),
        ),
        sample_y = factor(
            sample_y,
            levels = colnames(matrix_readcount_pseudo_bulk)
        )
    ) %>%
    plot_corr_heatmap(
        x = sample_x,
        y = sample_y,
        z = value
    )

R session info

devtools::session_info()$platform

##  setting  value                       
##  version  R version 4.0.2 (2020-06-22)
##  os       macOS Catalina 10.15.6      
##  system   x86_64, darwin19.5.0        
##  ui       unknown                     
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/Chicago             
##  date     2020-08-09

devtools::session_info()$pack %>%
    as_tibble() %>%
    select(
        package,
        loadedversion,
        date,
        `source`
    ) %>%
    # print(n = nrow(.))
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")

package	loadedversion	date	source
annotate	1.66.0	2020-04-27	Bioconductor
AnnotationDbi	1.50.3	2020-07-25	Bioconductor
assertthat	0.2.1	2019-03-21	CRAN (R 4.0.0)
backports	1.1.8	2020-06-17	CRAN (R 4.0.1)
BayesFactor	0.9.12-4.2	2018-05-19	CRAN (R 4.0.2)
Biobase	2.48.0	2020-04-27	Bioconductor
BiocGenerics	0.34.0	2020-04-27	Bioconductor
BiocParallel	1.22.0	2020-04-27	Bioconductor
bit	4.0.4	2020-08-04	CRAN (R 4.0.2)
bit64	4.0.2	2020-07-30	CRAN (R 4.0.2)
bitops	1.0-6	2013-08-17	CRAN (R 4.0.0)
blob	1.2.1	2020-01-20	CRAN (R 4.0.0)
broom	0.7.0.9001	2020-08-09	Github (tidymodels/broom@a0bb105)
callr	3.4.3.9000	2020-07-31	Github (r-lib/callr@b96da8f)
cellranger	1.1.0	2016-07-27	CRAN (R 4.0.0)
checkmate	2.0.0	2020-02-06	CRAN (R 4.0.0)
circlize	0.4.10	2020-06-15	CRAN (R 4.0.1)
cli	2.0.2	2020-02-28	CRAN (R 4.0.0)
coda	0.19-3	2019-07-05	CRAN (R 4.0.0)
colorspace	1.4-1	2019-03-18	CRAN (R 4.0.0)
crayon	1.3.4	2017-09-16	CRAN (R 4.0.0)
DBI	1.1.0	2019-12-15	CRAN (R 4.0.0)
dbplyr	1.4.4.9000	2020-07-28	Github (tidyverse/dbplyr@a6ed629)
DelayedArray	0.14.1	2020-07-14	Bioconductor
desc	1.2.0	2018-05-01	CRAN (R 4.0.0)
DESeq2	1.28.1	2020-05-12	Bioconductor
devtools	2.3.1.9000	2020-07-31	Github (r-lib/devtools@df619ce)
digest	0.6.25	2020-02-23	CRAN (R 4.0.0)
dplyr	1.0.1.9001	2020-08-03	Github (tidyverse/dplyr@dc3e0e6)
ellipsis	0.3.1.9000	2020-07-18	Github (r-lib/ellipsis@57a5071)
evaluate	0.14	2019-05-28	CRAN (R 4.0.0)
extrafont	0.17	2014-12-08	CRAN (R 4.0.2)
extrafontdb	1.0	2012-06-11	CRAN (R 4.0.0)
fansi	0.4.1	2020-01-08	CRAN (R 4.0.0)
farver	2.0.3	2020-01-16	CRAN (R 4.0.0)
forcats	0.5.0.9000	2020-05-28	Github (tidyverse/forcats@ab81d1b)
fs	1.5.0.9000	2020-08-03	Github (r-lib/fs@93e70a9)
genefilter	1.70.0	2020-04-27	Bioconductor
geneplotter	1.66.0	2020-04-27	Bioconductor
generics	0.0.2	2018-11-29	CRAN (R 4.0.0)
GenomeInfoDb	1.24.2	2020-06-15	Bioconductor
GenomeInfoDbData	1.2.3	2020-04-24	Bioconductor
GenomicRanges	1.40.0	2020-04-27	Bioconductor
ggplot2	3.3.2.9000	2020-08-04	Github (tidyverse/ggplot2@6d91349)
ggrepel	0.9.0	2020-07-24	Github (slowkow/ggrepel@4d0ef50)
GlobalOptions	0.1.2	2020-06-10	CRAN (R 4.0.0)
glue	1.4.1.9000	2020-07-07	Github (tidyverse/glue@205f18b)
gt	0.2.2	2020-08-06	Github (rstudio/gt@c97cb4c)
gtable	0.3.0	2019-03-25	CRAN (R 4.0.0)
gtools	3.8.2	2020-03-31	CRAN (R 4.0.0)
haven	2.3.1	2020-06-01	CRAN (R 4.0.0)
hms	0.5.3	2020-01-08	CRAN (R 4.0.0)
htmltools	0.5.0	2020-06-16	CRAN (R 4.0.1)
httr	1.4.2	2020-07-20	CRAN (R 4.0.2)
IRanges	2.22.2	2020-05-21	Bioconductor
jpeg	0.1-8.1	2019-10-24	CRAN (R 4.0.0)
jsonlite	1.7.0	2020-06-25	CRAN (R 4.0.2)
knitr	1.29	2020-06-23	CRAN (R 4.0.2)
labeling	0.3	2014-08-23	CRAN (R 4.0.0)
lattice	0.20-41	2020-04-02	CRAN (R 4.0.2)
lifecycle	0.2.0	2020-03-06	CRAN (R 4.0.0)
locfit	1.5-9.4	2020-03-25	CRAN (R 4.0.0)
lubridate	1.7.9	2020-07-11	Github (tidyverse/lubridate@de2ee09)
magrittr	1.5.0.9000	2020-08-04	Github (tidyverse/magrittr@ba52096)
Matrix	1.2-18	2019-11-27	CRAN (R 4.0.2)
MatrixModels	0.4-1	2015-08-22	CRAN (R 4.0.0)
matrixStats	0.56.0	2020-03-13	CRAN (R 4.0.0)
memoise	1.1.0	2017-04-21	CRAN (R 4.0.0)
modelr	0.1.8.9000	2020-05-19	Github (tidyverse/modelr@16168e0)
munsell	0.5.0	2018-06-12	CRAN (R 4.0.0)
mvtnorm	1.1-1	2020-06-09	CRAN (R 4.0.2)
patchwork	1.0.1.9000	2020-06-22	Github (thomasp85/patchwork@82a5e03)
pbapply	1.4-2	2019-08-31	CRAN (R 4.0.0)
pillar	1.4.6.9000	2020-07-21	Github (r-lib/pillar@8aef8f2)
pkgbuild	1.1.0.9000	2020-07-14	Github (r-lib/pkgbuild@3a87bd9)
pkgconfig	2.0.3	2019-09-22	CRAN (R 4.0.0)
pkgload	1.1.0	2020-05-29	CRAN (R 4.0.0)
prettyunits	1.1.1	2020-01-24	CRAN (R 4.0.2)
processx	3.4.3	2020-07-05	CRAN (R 4.0.2)
ps	1.3.3	2020-05-08	CRAN (R 4.0.0)
purrr	0.3.4.9000	2020-08-06	Github (tidyverse/purrr@f9d4821)
R6	2.4.1.9000	2020-07-18	Github (r-lib/R6@1415d11)
RColorBrewer	1.1-2	2014-12-07	CRAN (R 4.0.0)
Rcpp	1.0.5	2020-07-06	CRAN (R 4.0.2)
RCurl	1.98-1.2	2020-04-18	CRAN (R 4.0.0)
readr	1.3.1.9000	2020-07-16	Github (tidyverse/readr@2ab51b2)
readxl	1.3.1.9000	2020-05-28	Github (tidyverse/readxl@3815961)
remotes	2.2.0.9000	2020-08-06	Github (r-lib/remotes@5a546ad)
reprex	0.3.0	2019-05-16	CRAN (R 4.0.0)
reticulate	1.16	2020-05-27	CRAN (R 4.0.2)
rlang	0.4.7.9000	2020-07-31	Github (r-lib/rlang@a144ac0)
rmarkdown	2.3.3	2020-07-25	Github (rstudio/rmarkdown@204aa41)
rprojroot	1.3-2	2018-01-03	CRAN (R 4.0.0)
RSQLite	2.2.0	2020-01-07	CRAN (R 4.0.0)
rstudioapi	0.11.0-9000	2020-07-31	Github (rstudio/rstudioapi@aa17630)
Rtsne	0.16	2020-07-03	Github (jkrijthe/Rtsne@14b195f)
Rttf2pt1	1.3.8	2020-01-10	CRAN (R 4.0.0)
rvest	0.3.6	2020-07-25	CRAN (R 4.0.2)
S4Vectors	0.26.1	2020-05-16	Bioconductor
sass	0.2.0	2020-03-18	CRAN (R 4.0.2)
scales	1.1.1.9000	2020-07-24	Github (r-lib/scales@9ff4757)
sessioninfo	1.1.1.9000	2020-07-18	Github (r-lib/sessioninfo@791705b)
shape	1.4.4	2018-02-07	CRAN (R 4.0.0)
stringi	1.4.6	2020-02-17	CRAN (R 4.0.0)
stringr	1.4.0.9000	2020-06-01	Github (tidyverse/stringr@f70c4ba)
styler	1.3.2.9000	2020-07-25	Github (r-lib/styler@16d815e)
SummarizedExperiment	1.18.2	2020-07-09	Bioconductor
survival	3.2-3	2020-06-13	CRAN (R 4.0.2)
testthat	2.99.0.9000	2020-07-28	Github (r-lib/testthat@0af11cd)
tibble	3.0.3.9000	2020-07-21	Github (tidyverse/tibble@b4eec19)
tidyr	1.1.1.9000	2020-07-31	Github (tidyverse/tidyr@61e9209)
tidyselect	1.1.0.9000	2020-07-11	Github (tidyverse/tidyselect@69fdc96)
tidyverse	1.3.0.9000	2020-06-01	Github (hadley/tidyverse@8a0bb99)
usethis	1.6.1.9001	2020-08-06	Github (r-lib/usethis@51fcdb8)
utf8	1.1.4	2018-05-24	CRAN (R 4.0.0)
vctrs	0.3.2.9000	2020-08-04	Github (r-lib/vctrs@1b3cd7c)
viridisLite	0.3.0	2018-02-01	CRAN (R 4.0.0)
withr	2.2.0	2020-04-20	CRAN (R 4.0.0)
xfun	0.16	2020-07-24	CRAN (R 4.0.2)
XML	3.99-0.5	2020-07-23	CRAN (R 4.0.2)
xml2	1.3.2	2020-04-23	CRAN (R 4.0.0)
xtable	1.8-4	2019-04-21	CRAN (R 4.0.0)
XVector	0.28.0	2020-04-27	Bioconductor
yaml	2.2.1	2020-02-01	CRAN (R 4.0.0)
yarrr	0.1.6	2020-07-23	Github (ndphillips/yarrr@e2e4488)
zlibbioc	1.34.0	2020-04-27	Bioconductor

Transcriptomic analysis of trophoblast stem cells derived from naïve human pluripotent stem cells

JD

2020-08-09