Data preparation

SEED_USE <- 20200616
MINIMAL_NUM_COUNTS_REQUIRED_FOR_GENE <- 1 # 60

Functions loading

source(
    file = file.path(
        SCRIPT_DIR,
        "utilities.R"
    )
)

Data loading

Prepare metadata for single cells.

# https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE152101
geo_info <- tibble::tribble(
    ~sample_name,         ~sample_description,       ~source_name2, ~source_name3, ~cell_line,
    "GSM4603117",             "BT2 hTSC Rep1",          "BT2 hTSC",        "hTSC",      "BT2",
    "GSM4603118",             "BT2 hTSC Rep2",          "BT2 hTSC",        "hTSC",      "BT2",
    "GSM4603119",             "BT2 hTSC Rep3",          "BT2 hTSC",        "hTSC",      "BT2",
    "GSM4603120",             "BT2 hTSC Rep4",          "BT2 hTSC",        "hTSC",      "BT2",
    "GSM4603121",            "CT1 hTSCs Rep1",          "CT1 hTSC",        "hTSC",      "CT1",
    "GSM4603122",            "CT1 hTSCs Rep2",          "CT1 hTSC",        "hTSC",      "CT1",
    "GSM4603123",            "CT1 hTSCs Rep3",          "CT1 hTSC",        "hTSC",      "CT1",
    "GSM4603124",            "CT1 hTSCs Rep4",          "CT1 hTSC",        "hTSC",      "CT1",
    "GSM4603125",            "CT1 hTSCs Rep5",          "CT1 hTSC",        "hTSC",      "CT1",
    "GSM4603126",            "CT1 hTSCs Rep6",          "CT1 hTSC",        "hTSC",      "CT1",
    "GSM4603127",            "CT1 hTSCs Rep7",          "CT1 hTSC",        "hTSC",      "CT1",
    "GSM4603128",            "CT3 hTSCs Rep1",          "CT3 hTSC",        "hTSC",      "CT3",
    "GSM4603129",            "CT3 hTSCs Rep2",          "CT3 hTSC",        "hTSC",      "CT3",
    "GSM4603130",            "CT3 hTSCs Rep3",          "CT3 hTSC",        "hTSC",      "CT3",
    "GSM4603131",            "CT3 hTSCs Rep4",          "CT3 hTSC",        "hTSC",      "CT3",
    "GSM4603132",   "UCLA1 Primed hESCs Rep1", "UCLA1 Primed hESC", "Primed hESC",    "UCLA1",
    "GSM4603133",   "UCLA1 Primed hESCs Rep2", "UCLA1 Primed hESC", "Primed hESC",    "UCLA1",
    "GSM4603134",   "UCLA1 Primed hESCs Rep3", "UCLA1 Primed hESC", "Primed hESC",    "UCLA1",
    "GSM4603135",  "WIBR3 Primed hESCs Rep 1", "WIBR3 Primed hESC", "Primed hESC",    "WIBR3",
    "GSM4603136",  "WIBR3 Primed hESCs Rep 2", "WIBR3 Primed hESC", "Primed hESC",    "WIBR3",
    "GSM4603137",  "WIBR3 Primed hESCs Rep 3", "WIBR3 Primed hESC", "Primed hESC",    "WIBR3",
    "GSM4603138",  "WIBR3 Primed hESCs Rep 4", "WIBR3 Primed hESC", "Primed hESC",    "WIBR3",
    "GSM4603139",   "WIBR3 Naive hESCs Rep 1",  "WIBR3 Naive hESC", "Primed hESC",    "WIBR3",
    "GSM4603140",   "WIBR3 Naive hESCs Rep 2",  "WIBR3 Naive hESC", "Primed hESC",    "WIBR3",
    "GSM4603141", "WIBR3 tdhTSC Line 1 Rep 1",      "WIBR3 tdhTSC",      "tdhTSC",    "WIBR3",
    "GSM4603142", "WIBR3 tdhTSC Line 1 Rep 2",      "WIBR3 tdhTSC",      "tdhTSC",    "WIBR3",
    "GSM4603143",  "WIBR3 tdhTSC Line 2 Rep1",      "WIBR3 tdhTSC",      "tdhTSC",    "WIBR3",
    "GSM4603144", "WIBR3 tdhTSC Line 2 Rep 2",      "WIBR3 tdhTSC",      "tdhTSC",    "WIBR3",
    "GSM4603145", "WIBR3 tdhTSC Line 3 Rep 1",      "WIBR3 tdhTSC",      "tdhTSC",    "WIBR3",
    "GSM4603146", "UCLA1 tdhTSC Line 1 Rep 1",      "UCLA1 tdhTSC",      "tdhTSC",    "UCLA1",
    "GSM4603147", "UCLA1 tdhTSC Line 1 Rep 2",      "UCLA1 tdhTSC",      "tdhTSC",    "UCLA1",
    "GSM4603148",                     "FT190",             "FT190",       "FT190",    "FT190",
    "GSM4603149",                    "Hec116",            "Hec116",      "Hec116",   "Hec116"
)

cell_metadata <- read_delim(
    file = "../SraRunTable.txt",
    delim = ","
) %>%
    rename_all(. %>% tolower()) %>%
    `colnames<-`(str_replace(
        string = colnames(.),
        pattern = " ", replacement = "_"
    )) %>%
    select(
        sample_name,
        run,
        biosample,
        source_name,
        cell_type
    ) %>%
    left_join(geo_info, by = "sample_name") %>%
    mutate(
        source_name3 = factor(
            source_name3,
            levels = c("Primed hESC", "tdhTSC", "hTSC", "FT190", "Hec116")
        ),
        source_name2 = factor(
            source_name2,
            levels = c(
                "WIBR3 Naive hESC",
                "WIBR3 Primed hESC",
                "UCLA1 Primed hESC",
                "WIBR3 tdhTSC",
                "UCLA1 tdhTSC",
                "CT1 hTSC",
                "CT3 hTSC",
                "BT2 hTSC",
                "FT190",
                "Hec116"
            )
        )
    )

## Parsed with column specification:
## cols(
##   .default = col_character(),
##   AvgSpotLen = col_double(),
##   Bases = col_double(),
##   Bytes = col_double(),
##   ReleaseDate = col_datetime(format = "")
## )

## See spec(...) for full column specifications.

cell_metadata

Load featureCounts output.

matrix_readcount_use <- read_delim(
    file = "../matrix/xaa_Aligned.sortedByCoord.out_deduped_q10_gene_id_featureCounts.txt.gz",
    delim = "\t",
    col_names = TRUE,
    skip = 1
) %>%
    select(-(2:6))

## Parsed with column specification:
## cols(
##   .default = col_double(),
##   Geneid = col_character(),
##   Chr = col_character(),
##   Start = col_character(),
##   End = col_character(),
##   Strand = col_character()
## )

## See spec(...) for full column specifications.

colnames(matrix_readcount_use) <- colnames(matrix_readcount_use) %>%
    str_remove(
        pattern = "_rnaseq/aln/Aligned.sortedByCoord.out_deduped.+.bam"
    ) %>%
    str_remove(pattern = "../aln/")

matrix_readcount_use <- Matrix(
    data = as.matrix(matrix_readcount_use[, -1]),
    dimnames = list(
        matrix_readcount_use$Geneid,
        colnames(matrix_readcount_use)[-1]
    ),
    sparse = TRUE
)

matrix_readcount_use <- matrix_readcount_use[, colnames(matrix_readcount_use)]
matrix_readcount_use %>%
    dim()

## [1] 33538    33

stopifnot(
    rownames(matrix_readcount_use) == gene_symbo_info$X1
)
rownames(matrix_readcount_use) <- paste(
    rownames(matrix_readcount_use),
    gene_symbo_info$X2,
    sep = "_"
)

colnames(matrix_readcount_use) <- colnames(matrix_readcount_use) %>%
    enframe(value = "run") %>%
    left_join(cell_metadata, by = c("run" = "run")) %>%
    pull("sample_name")


matrix_cpm_use <- calc_cpm(m = matrix_readcount_use)

Preprocessing

Summarize sequencing statistics.

cell_metadata %>%
    mutate(
        num_umis = matrix_readcount_use[, .$sample_name] %>% colSums(),
        num_genes = (matrix_readcount_use[, .$sample_name] > 0) %>% colSums()
    ) %>%
    group_by(source_name) %>%
    summarise(
        num_cells = n(),
        num_umis = median(num_umis),
        num_genes = median(num_genes)
    ) %>%
    arrange(source_name) %>%
    as.data.frame() %>%
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")

## `summarise()` ungrouping output (override with `.groups` argument)

source_name	num_cells	num_umis	num_genes
BT2 hTSC	4	16801613	19248.5
CT1 hTSCs	7	22494057	19757.0
CT3 hTSCs	4	12846493	18481.5
FT190	1	27145833	19662.0
Hec116	1	31181575	19418.0
UCLA1 Primed hESCs	3	28791188	20303.0
UCLA1 tdhTSC Line 1	2	6277919	18684.5
WIBR3 Naive hESCs	2	16976784	22432.0
WIBR3 Primed hESCs	4	20537352	22395.0
WIBR3 tdhTSC Line 1	2	7416691	19475.5
WIBR3 tdhTSC Line 2	2	18797339	20440.0
WIBR3 tdhTSC Line 3	1	8134445	19006.0

Filter uninformative genes.

matrix_readcount_norm <- matrix_readcount_use
matrix_readcount_norm <- matrix_readcount_norm[
    Matrix::rowSums(
        matrix_readcount_norm
    ) >= MINIMAL_NUM_COUNTS_REQUIRED_FOR_GENE,
]
dim(matrix_readcount_norm)

## [1] 29620    33

Median normalize matrix.

matrix_readcount_norm@x <- median(colSums(matrix_readcount_norm)) *
    (matrix_readcount_norm@x / rep.int(
        colSums(matrix_readcount_norm),
        diff(matrix_readcount_norm@p)
    ))

Dimensionality reduction

PCA

matrix_readcount_norm_log <- matrix_readcount_norm
matrix_readcount_norm_log@x <- log1p(matrix_readcount_norm_log@x)

# z-score
matrix_readcount_norm_log_scaled <- t(
    scale(
        t(
            matrix_readcount_norm_log
        ),
        center = TRUE, scale = TRUE
    )
)
# features_var <- apply(matrix_readcount_norm_log_scaled, 1, var)
# features_use <- rownames(matrix_readcount_norm_log_scaled)[features_var > 0]

pca_out <- prcomp(
    t(matrix_readcount_norm_log_scaled),
    center = FALSE,
    scale = FALSE
)

summary(pca_out)$imp %>%
    t() %>%
    as.data.frame() %>%
    rownames_to_column(var = "component") %>%
    mutate(
        rank = 1:n()
    ) %>%
    # slice(1:33) %>%
    ggplot(
        aes(
            x = rank,
            y = `Proportion of Variance`
        )
    ) +
    geom_point(size = 0.3) +
    theme_bw() +
    scale_x_continuous(
        name = "Component",
        breaks = c(1, seq(5, 30, 5))
    ) +
    scale_y_continuous(
        name = "Proportion of variance",
        labels = scales::percent
    ) +
    theme(
        axis.title = ggplot2::element_text(family = "Arial", size = 6),
        axis.text = ggplot2::element_text(family = "Arial", size = 6),
        panel.grid.minor = ggplot2::element_blank()
    )

file_name <- "Rplot_pca_variance_explained.pdf"
if (!file.exists(file_name)) {
    ggsave(
        filename = file_name,
        useDingbats = FALSE,
        plot = last_plot(),
        device = NULL,
        path = NULL,
        scale = 1,
        width = 55 * 1.5,
        height = 55,
        units = c("mm"),
    )
}

embedding <- pca_out$x %>%
    as.data.frame() %>%
    rownames_to_column(var = "sample_name") %>%
    left_join(cell_metadata, by = "sample_name") %>%
    select(
        sample_name,
        run,
        biosample,
        source_name,
        source_name2,
        source_name3,
        cell_line,
        cell_type,
        PC1,
        PC2,
        PC3
    ) %>%
    dplyr::rename(
        x_pca = PC1,
        y_pca = PC2,
        z_pca = PC3
    )

p_embedding_pca1 <- embedding %>%
    mutate(
        category = "PCA"
    ) %>%
    sample_frac() %>%
    plot_pca(
        x = x_pca,
        y = y_pca,
        color = source_name3,
        shape = cell_line
    ) +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 10")(
            length(unique(embedding$source_name3))
        )
    ) +
    scale_shape_manual(values = seq(length(unique(embedding$cell_line)))) +
    add_xy_label_pca()

p_embedding_pca2 <- embedding %>%
    mutate(
        category = "PCA"
    ) %>%
    sample_frac() %>%
    plot_pca(
        x = x_pca,
        y = z_pca,
        color = source_name3,
        shape = cell_line
    ) +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 10")(
            length(unique(embedding$source_name3))
        )
    ) +
    scale_shape_manual(values = seq(length(unique(embedding$cell_line)))) +
    add_xy_label_pca(y = "PC3")

p_embedding_pca1 +
    p_embedding_pca2 +
    plot_annotation(theme = theme(plot.margin = margin())) +
    plot_layout(guides = "collect")

file_name <- "Rplot_embedding_pca.pdf"
if (!file.exists(file_name)) {
    ggsave(
        filename = file_name,
        useDingbats = FALSE,
        plot = last_plot(),
        device = NULL,
        path = NULL,
        scale = 1,
        width = 55 * 2.5,
        height = 55,
        units = c("mm"),
    )
}

t-SNE

N_COMPONENTS <- 15

set.seed(seed = SEED_USE)
embedding_rtsne <- Rtsne::Rtsne(
    X = pca_out$x[, 1:N_COMPONENTS],
    perplexity = 5,
    check_duplicates = FALSE,
    pca = FALSE,
    max_iter = 3000,
    verbose = TRUE
)$Y
## Read the 33 x 15 data matrix successfully!
## Using no_dims = 2, perplexity = 5.000000, and theta = 0.500000
## Computing input similarities...
## Building tree...
## Done in 0.00 seconds (sparsity = 0.631772)!
## Learning embedding...
## Iteration 50: error is 64.222172 (50 iterations in 0.00 seconds)
## Iteration 100: error is 63.810775 (50 iterations in 0.00 seconds)
## Iteration 150: error is 65.054511 (50 iterations in 0.00 seconds)
## Iteration 200: error is 60.301765 (50 iterations in 0.00 seconds)
## Iteration 250: error is 59.448383 (50 iterations in 0.00 seconds)
## Iteration 300: error is 1.851475 (50 iterations in 0.00 seconds)
## Iteration 350: error is 1.244501 (50 iterations in 0.00 seconds)
## Iteration 400: error is 0.733360 (50 iterations in 0.00 seconds)
## Iteration 450: error is 0.417021 (50 iterations in 0.00 seconds)
## Iteration 500: error is 0.704775 (50 iterations in 0.00 seconds)
## Iteration 550: error is 0.518082 (50 iterations in 0.00 seconds)
## Iteration 600: error is 0.666820 (50 iterations in 0.00 seconds)
## Iteration 650: error is 0.586612 (50 iterations in 0.00 seconds)
## Iteration 700: error is 0.460416 (50 iterations in 0.00 seconds)
## Iteration 750: error is 0.261434 (50 iterations in 0.00 seconds)
## Iteration 800: error is 0.237304 (50 iterations in 0.00 seconds)
## Iteration 850: error is 0.238038 (50 iterations in 0.00 seconds)
## Iteration 900: error is 0.234468 (50 iterations in 0.00 seconds)
## Iteration 950: error is 0.233803 (50 iterations in 0.00 seconds)
## Iteration 1000: error is 0.238547 (50 iterations in 0.00 seconds)
## Iteration 1050: error is 0.239816 (50 iterations in 0.00 seconds)
## Iteration 1100: error is 0.232605 (50 iterations in 0.00 seconds)
## Iteration 1150: error is 0.233348 (50 iterations in 0.00 seconds)
## Iteration 1200: error is 0.232086 (50 iterations in 0.00 seconds)
## Iteration 1250: error is 0.230282 (50 iterations in 0.00 seconds)
## Iteration 1300: error is 0.232419 (50 iterations in 0.00 seconds)
## Iteration 1350: error is 0.238228 (50 iterations in 0.00 seconds)
## Iteration 1400: error is 0.229377 (50 iterations in 0.00 seconds)
## Iteration 1450: error is 0.224402 (50 iterations in 0.00 seconds)
## Iteration 1500: error is 0.232671 (50 iterations in 0.00 seconds)
## Iteration 1550: error is 0.228866 (50 iterations in 0.00 seconds)
## Iteration 1600: error is 0.235634 (50 iterations in 0.00 seconds)
## Iteration 1650: error is 0.243834 (50 iterations in 0.00 seconds)
## Iteration 1700: error is 0.237870 (50 iterations in 0.00 seconds)
## Iteration 1750: error is 0.237896 (50 iterations in 0.00 seconds)
## Iteration 1800: error is 0.233248 (50 iterations in 0.00 seconds)
## Iteration 1850: error is 0.230783 (50 iterations in 0.00 seconds)
## Iteration 1900: error is 0.251373 (50 iterations in 0.00 seconds)
## Iteration 1950: error is 0.233596 (50 iterations in 0.00 seconds)
## Iteration 2000: error is 0.237776 (50 iterations in 0.00 seconds)
## Iteration 2050: error is 0.232010 (50 iterations in 0.00 seconds)
## Iteration 2100: error is 0.236186 (50 iterations in 0.00 seconds)
## Iteration 2150: error is 0.232908 (50 iterations in 0.00 seconds)
## Iteration 2200: error is 0.231723 (50 iterations in 0.00 seconds)
## Iteration 2250: error is 0.233285 (50 iterations in 0.00 seconds)
## Iteration 2300: error is 0.231491 (50 iterations in 0.00 seconds)
## Iteration 2350: error is 0.227211 (50 iterations in 0.00 seconds)
## Iteration 2400: error is 0.232979 (50 iterations in 0.00 seconds)
## Iteration 2450: error is 0.233152 (50 iterations in 0.00 seconds)
## Iteration 2500: error is 0.233657 (50 iterations in 0.00 seconds)
## Iteration 2550: error is 0.232566 (50 iterations in 0.00 seconds)
## Iteration 2600: error is 0.232617 (50 iterations in 0.00 seconds)
## Iteration 2650: error is 0.234571 (50 iterations in 0.00 seconds)
## Iteration 2700: error is 0.230194 (50 iterations in 0.00 seconds)
## Iteration 2750: error is 0.233461 (50 iterations in 0.00 seconds)
## Iteration 2800: error is 0.231259 (50 iterations in 0.00 seconds)
## Iteration 2850: error is 0.234836 (50 iterations in 0.00 seconds)
## Iteration 2900: error is 0.233992 (50 iterations in 0.00 seconds)
## Iteration 2950: error is 0.230162 (50 iterations in 0.00 seconds)
## Iteration 3000: error is 0.231858 (50 iterations in 0.00 seconds)
## Fitting performed in 0.09 seconds.

embedding <- cbind(
    embedding,
    embedding_rtsne
) %>%
    dplyr::rename(
        x_tsne = "1",
        y_tsne = "2"
    )

p_embedding_tsne <- embedding %>%
    mutate(
        category = "t-SNE"
    ) %>%
    sample_frac() %>%
    plot_pca(
        x = x_tsne,
        y = y_tsne,
        color = source_name3,
        shape = cell_line
    ) +
    labs(x = "Dim 1", y = "Dim 2") +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 10")(
            length(unique(embedding$source_name3))
        )
    ) +
    scale_shape_manual(values = seq(length(unique(embedding$cell_line))))

embedding %>%
    dplyr::count(source_name3) %>%
    gt::gt() %>%
    gt::cols_label(n = "num_cells") %>%
    gt::tab_options(table.font.size = "median")

source_name3	num_cells
Primed hESC	9
tdhTSC	7
hTSC	15
FT190	1
Hec116	1

UMAP

N_COMPONENTS <- 15

set.seed(seed = SEED_USE)
embedding_umap <- uwot::umap(
    X = pca_out$x[, 1:N_COMPONENTS],
    n_neighbors = 15,
    n_components = 2,
    metric = "cosine",
    spread = 1,
    min_dist = 0.01,
    verbose = TRUE
)
## 22:00:08 UMAP embedding parameters a = 1.896 b = 0.8006
## 22:00:08 Read 33 rows and found 15 numeric columns
## 22:00:08 Using Annoy for neighbor search, n_neighbors = 15
## 22:00:08 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 22:00:08 Writing NN index file to temp file /var/folders/h1/78b2tkd552ngjl6_ps_5sw7r0000gn/T//RtmphxQO4k/file4db8500595ea
## 22:00:08 Searching Annoy index using 4 threads, search_k = 1500
## 22:00:08 Annoy recall = 100%
## 22:00:09 Commencing smooth kNN distance calibration using 4 threads
## 22:00:09 Initializing from normalized Laplacian + noise
## 22:00:09 Commencing optimization for 500 epochs, with 508 positive edges
## 22:00:09 Optimization finished

embedding <- cbind(
    embedding,
    embedding_umap
) %>%
    dplyr::rename(
        x_umap = "1",
        y_umap = "2"
    )

p_embedding_umap <- embedding %>%
    mutate(
        category = "UMAP"
    ) %>%
    sample_frac() %>%
    plot_pca(
        x = x_umap,
        y = y_umap,
        color = source_name3,
        shape = cell_line
    ) +
    labs(x = "Dim 1", y = "Dim 2") +
    scale_color_manual(
        values = ggthemes::tableau_color_pal("Tableau 10")(
            length(unique(embedding$source_name3))
        )
    ) +
    scale_shape_manual(values = seq(length(unique(embedding$cell_line))))

Fig. 3C

p_embedding_tsne +
    p_embedding_umap +
    plot_annotation(theme = theme(plot.margin = margin())) +
    plot_layout(nrow = 1, guides = "collect")

file_name <- "Rplot_embedding.pdf"
if (!file.exists(file_name)) {
    ggsave(
        filename = file_name,
        useDingbats = FALSE,
        plot = last_plot(),
        device = NULL,
        path = NULL,
        scale = 1,
        width = 55 * 2.5,
        height = 55,
        units = c("mm"),
    )
}

Graphical Abstract

p_embedding_pca1 +
    add_point_labels(
        x = x_pca,
        y = y_pca,
        z = source_name2
    ) +
    p_embedding_tsne +
    add_point_labels(
        x = x_tsne,
        y = y_tsne,
        z = source_name2
    ) +
    p_embedding_umap +
    add_point_labels(
        x = x_umap,
        y = y_umap,
        z = source_name2
    ) +
    plot_annotation(theme = theme(plot.margin = margin())) +
    plot_layout(ncol = 1, guides = "collect")

Expression

embedding %>%
    dplyr::count(source_name2) %>%
    gt::gt() %>%
    gt::cols_label(n = "num_cells") %>%
    gt::tab_options(table.font.size = "median")

source_name2	num_cells
WIBR3 Naive hESC	2
WIBR3 Primed hESC	4
UCLA1 Primed hESC	3
WIBR3 tdhTSC	5
UCLA1 tdhTSC	2
CT1 hTSC	7
CT3 hTSC	4
BT2 hTSC	4
FT190	1
Hec116	1

FEATUES_SELECTED <- c(
    "ENSG00000204531_POU5F1",
    "ENSG00000111704_NANOG",
    "ENSG00000181449_SOX2",
    #
    "ENSG00000179348_GATA2",
    "ENSG00000107485_GATA3",
    "ENSG00000087510_TFAP2C",
    "ENSG00000102243_VGLL1",
    "ENSG00000204632_HLA-G",
    "ENSG00000189052_CGB5",
    "ENSG00000137270_GCM1",
    "ENSG00000242950_ERVW-1",
    "ENSG00000113249_HAVCR1",
    "ENSG00000131503_ANKHD1",
    "ENSG00000134757_DSG3",
    #
    "ENSG00000115221_ITGB6",
    "ENSG00000016082_ISL1",
    "ENSG00000075223_SEMA3C",
    "ENSG00000182968_SOX1",
    "ENSG00000164458_TBXT",
    "ENSG00000136574_GATA4",
    #
    "ENSG00000073282_TP63",
    "ENSG00000135374_ELF5",
    "ENSG00000197905_TEAD4"
)

matrix_readcount_meta <- map(levels(embedding$source_name2), function(x) {
    sample_in_group <- embedding %>%
        filter(source_name2 == x) %>%
        pull(sample_name)

    if (length(sample_in_group) > 1) {
        a <- matrix_readcount_use[, sample_in_group] %>% rowSums()
    } else {
        a <- matrix_readcount_use[, sample_in_group]
    }

    return(a)
}) %>%
    bind_cols() %>%
    `colnames<-`(levels(embedding$source_name2)) %>%
    as.matrix()

## New names:
## * NA -> ...1
## * NA -> ...2
## * NA -> ...3
## * NA -> ...4
## * NA -> ...5
## * ...

rownames(matrix_readcount_meta) <- rownames(matrix_readcount_use)

Fig. 3A

calc_cpm(matrix_readcount_meta)[FEATUES_SELECTED, ] %>%
    as.matrix() %>%
    as.data.frame() %>%
    rownames_to_column(var = "feature") %>%
    pivot_longer(
        -c("feature"),
        names_to = "sample"
    ) %>%
    mutate(
        feature = factor(
            feature,
            levels = FEATUES_SELECTED %>% rev(.)
        ),
        sample = factor(
            sample,
            levels = colnames(matrix_readcount_meta)
        )
    ) %>%
    plot_heatmap(
        x = sample,
        y = feature,
        z = log10(value + 1),
        y_order = rev(FEATUES_SELECTED)
    )

hTSC vs primed hESC

hTSC (CT1, CT3, and BT2) versus primed (WIBR3 and UCLA1)

library("DESeq2")
## Loading required package: S4Vectors
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following object is masked from 'package:Matrix':
## 
##     which
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which, which.max, which.min
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:Matrix':
## 
##     expand
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following object is masked from 'package:tidyr':
## 
##     expand
## The following object is masked from 'package:base':
## 
##     expand.grid
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## The following object is masked from 'package:purrr':
## 
##     reduce
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## Attaching package: 'Biobase'
## The following object is masked from 'package:rlang':
## 
##     exprs
## Loading required package: DelayedArray
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'DelayedArray'
## The following objects are masked from 'package:matrixStats':
## 
##     colMaxs, colMins, colRanges, rowMaxs, rowMins, rowRanges
## The following object is masked from 'package:purrr':
## 
##     simplify
## The following objects are masked from 'package:base':
## 
##     aperm, apply, rowsum
library("BiocParallel")

samples_hTSC <- embedding %>%
    filter(str_detect(string = source_name2, pattern = "\\shTSC$")) %>%
    pull(sample_name)

samples_primed_hESC <- embedding %>%
    filter(str_detect(string = source_name2, pattern = "\\sPrimed hESC$")) %>%
    pull(sample_name)

# Primed hPSC in hTSC media: GSM4116149, GSM4116150
# naïve hTSC: GSM4116158, GSM4116159, GSM4116160, GSM4276363, GSM4276364

cts <- matrix_readcount_use[, c(samples_hTSC, samples_primed_hESC)]
col_data <- data.frame(
    sample = c(
        rep("hTSC", times = length(samples_hTSC)),
        rep("Primed hESC", times = length(samples_primed_hESC))
    ),
    row.names = c(samples_hTSC, samples_primed_hESC)
) %>%
    mutate(
        sample = factor(
            sample,
            levels = c("Primed hESC", "hTSC")
        )
    )

dds <- DESeqDataSetFromMatrix(
    countData = cts,
    colData = col_data,
    design = ~sample
)
## converting counts to integer mode
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]

dds <- DESeq(dds)
## estimating size factors
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## final dispersion estimates
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## fitting model and testing
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## -- replacing outliers and refitting for 181 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]
## fitting model and testing
##   Note: levels of factors in the design contain characters other than
##   letters, numbers, '_' and '.'. It is recommended (but not required) to use
##   only letters, numbers, and delimiters '_' or '.', as these are safe characters
##   for column names in R. [This is a message, not a warning or an error]

deseq2_out <- results(dds)
# mcols(deseq2_out)
deseq2_out
## log2 fold change (MLE): sample hTSC vs Primed.hESC 
## Wald test p-value: sample hTSC vs Primed.hESC 
## DataFrame with 33538 rows and 6 columns
##                               baseMean log2FoldChange     lfcSE      stat
##                              <numeric>      <numeric> <numeric> <numeric>
## ENSG00000243485_MIR1302-2HG  0.0798401      -0.602397   3.23216 -0.186376
## ENSG00000237613_FAM138A      0.1387252       0.565111   3.23363  0.174761
## ENSG00000186092_OR4F5        0.9202942       0.906621   1.02055  0.888362
## ENSG00000238009_AL627309.1   1.2747570       0.485332   1.00998  0.480538
## ENSG00000239945_AL627309.3   0.0000000             NA        NA        NA
## ...                                ...            ...       ...       ...
## ENSG00000277856_AC233755.2   0.0655942       1.082014  3.233171  0.334660
## ENSG00000275063_AC233755.1   0.0000000             NA        NA        NA
## ENSG00000271254_AC240274.1  60.9560234      -0.155132  0.378897 -0.409431
## ENSG00000277475_AC213203.1   2.4806493      -3.757300  0.931362 -4.034199
## ENSG00000268674_FAM231C      0.0000000             NA        NA        NA
##                                  pvalue        padj
##                               <numeric>   <numeric>
## ENSG00000243485_MIR1302-2HG    0.852150          NA
## ENSG00000237613_FAM138A        0.861268          NA
## ENSG00000186092_OR4F5          0.374346    0.459478
## ENSG00000238009_AL627309.1     0.630845    0.700969
## ENSG00000239945_AL627309.3           NA          NA
## ...                                 ...         ...
## ENSG00000277856_AC233755.2  7.37881e-01          NA
## ENSG00000275063_AC233755.1           NA          NA
## ENSG00000271254_AC240274.1  6.82223e-01 0.745439496
## ENSG00000277475_AC213203.1  5.47889e-05 0.000158789
## ENSG00000268674_FAM231C              NA          NA

summary(deseq2_out)

## 
## out of 28682 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 5837, 20%
## LFC < 0 (down)     : 8657, 30%
## outliers [1]       : 0, 0%
## low counts [2]     : 5515, 19%
## (mean count < 0)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

table_s6_sheet1 <- readxl::read_excel(
    path = "../../../../docs/Naive_Human_Embryonic_Stem_Cells_Can_Give_Rise_to_Cells_with_a_Trophoblast-like_Transcriptome_and_Methylome/mmc6.xlsx",
    sheet = "Sheet1"
)

table_s6_sheet1

Fig. 6B
Genes marked are from Table S6

# grep("pasd1", rownames(matrix_readcount_use), ignore.case = TRUE, value = TRUE)

FEATURES_TO_LABEL <- paste(
    table_s6_sheet1$Gene,
    table_s6_sheet1$Symbol,
    sep = "_"
)

FEATURES_TO_LABEL <- intersect(
    FEATURES_TO_LABEL,
    rownames(matrix_readcount_use)
)

deseq2_out %>%
    as.data.frame() %>%
    mutate(
        group = case_when(
            log2FoldChange >= 2 & padj <= 0.05 ~ "High in hTSC",
            log2FoldChange <= -2 & padj <= 0.05 ~ "High in primed hESC",
            TRUE ~ "Not significant"
        )
    ) %>%
    plot_volcano(
        x = log2FoldChange,
        y = -log10(padj),
        z = group, xlim = c(-12.5, 12.5)
    ) +
    geom_text_repel(
        data = as.data.frame(deseq2_out)[FEATURES_TO_LABEL, ] %>%
            mutate(
                symbol = rownames(.),
                symbol = str_remove(string = symbol, pattern = "^E.+_")
            ),
        aes(log2FoldChange,
            -log10(padj),
            label = symbol
        ),
        box.padding = 0.5,
        size = 1.8,
        family = "Arial",
        color = "black",
        max.iter = 10000,
        max.overlaps = Inf,
        nudge_x = .15,
        nudge_y = 1,
        segment.curvature = -0.1,
        segment.ncp = 3,
        segment.angle = 20,
    )

## Warning: Removed 10367 rows containing missing values (geom_point).

hTSC vs tdhTSC

hTSC (CT1, CT3, and BT2) versus tdhTSC (WIBR3 tdhTSC lines 1, 2, and 3, and UCLA1 line 1)

samples_tdhTSC <- embedding %>%
    filter(str_detect(string = source_name2, pattern = "\\stdhTSC$")) %>%
    pull(sample_name)

# Primed hPSC in hTSC media: GSM4116149, GSM4116150
# naïve hTSC: GSM4116158, GSM4116159, GSM4116160, GSM4276363, GSM4276364

cts <- matrix_readcount_use[, c(samples_hTSC, samples_tdhTSC)]
col_data <- data.frame(
    sample = c(
        rep("hTSC", times = length(samples_hTSC)),
        rep("tdhTSC", times = length(samples_tdhTSC))
    ),
    row.names = c(samples_hTSC, samples_tdhTSC)
) %>%
    mutate(
        sample = factor(
            sample,
            levels = c("tdhTSC", "hTSC")
        )
    )

dds <- DESeqDataSetFromMatrix(
    countData = cts,
    colData = col_data,
    design = ~sample
)
## converting counts to integer mode

dds <- DESeq(dds)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 44 genes
## -- DESeq argument 'minReplicatesForReplace' = 7 
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing

deseq2_out <- results(dds)
# mcols(deseq2_out)
deseq2_out
## log2 fold change (MLE): sample hTSC vs tdhTSC 
## Wald test p-value: sample hTSC vs tdhTSC 
## DataFrame with 33538 rows and 6 columns
##                               baseMean log2FoldChange     lfcSE         stat
##                              <numeric>      <numeric> <numeric>    <numeric>
## ENSG00000243485_MIR1302-2HG  0.0000000             NA        NA           NA
## ENSG00000237613_FAM138A      0.0865244      -0.104330   3.23311   -0.0322691
## ENSG00000186092_OR4F5        0.8575957       0.178602   1.04037    0.1716719
## ENSG00000238009_AL627309.1   0.9088490       0.653415   1.29504    0.5045498
## ENSG00000239945_AL627309.3   0.0000000             NA        NA           NA
## ...                                ...            ...       ...          ...
## ENSG00000277856_AC233755.2   0.0526722    0.000918529  3.232669  0.000284139
## ENSG00000275063_AC233755.1   0.0000000             NA        NA           NA
## ENSG00000271254_AC240274.1  57.1957050   -0.755355136  0.377955 -1.998531428
## ENSG00000277475_AC213203.1   0.2565203    0.462397405  2.923221  0.158180804
## ENSG00000268674_FAM231C      0.0000000             NA        NA           NA
##                                pvalue      padj
##                             <numeric> <numeric>
## ENSG00000243485_MIR1302-2HG        NA        NA
## ENSG00000237613_FAM138A      0.974257        NA
## ENSG00000186092_OR4F5        0.863695        NA
## ENSG00000238009_AL627309.1   0.613875        NA
## ENSG00000239945_AL627309.3         NA        NA
## ...                               ...       ...
## ENSG00000277856_AC233755.2  0.9997733        NA
## ENSG00000275063_AC233755.1         NA        NA
## ENSG00000271254_AC240274.1  0.0456591  0.160629
## ENSG00000277475_AC213203.1  0.8743143        NA
## ENSG00000268674_FAM231C            NA        NA

summary(deseq2_out)

## 
## out of 27676 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 2047, 7.4%
## LFC < 0 (down)     : 1931, 7%
## outliers [1]       : 0, 0%
## low counts [2]     : 9021, 33%
## (mean count < 1)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

Fig. 6C

FEATURES_TO_LABEL <- c(
    "ENSG00000109576_AADAT",
    "ENSG00000121413_ZSCAN18",
    "ENSG00000048545_GUCA1A",
    "ENSG00000120256_LRP11",
    "ENSG00000112837_TBX18",
    #
    "ENSG00000066827_ZFAT",
    "ENSG00000198300_PEG3",
    "ENSG00000270816_LINC00221",
    "ENSG00000284701_TMEM247",
    "ENSG00000147381_MAGEA4",
    "ENSG00000225778_PROSER2-AS1",
    "ENSG00000166049_PASD1"
)

deseq2_out %>%
    as.data.frame() %>%
    mutate(
        group = case_when(
            log2FoldChange >= 2 & padj <= 0.05 ~ "High in hTSC",
            log2FoldChange <= -2 & padj <= 0.05 ~ "High in tdhTSC",
            TRUE ~ "Not significant"
        )
    ) %>%
    plot_volcano(
        x = log2FoldChange,
        y = -log10(padj),
        z = group, xlim = c(-10, 10)
    ) +
    geom_text_repel(
        data = as.data.frame(deseq2_out)[FEATURES_TO_LABEL, ] %>%
            mutate(
                symbol = rownames(.),
                symbol = str_remove(string = symbol, pattern = "^E.+_")
            ),
        aes(log2FoldChange,
            -log10(padj),
            label = symbol
        ),
        size = 1.8,
        family = "Arial",
        color = "black",
        nudge_x = .15,
        box.padding = 0.5,
        nudge_y = 1,
        segment.curvature = -0.1,
        segment.ncp = 3,
        segment.angle = 20
    )

## Warning: Removed 14882 rows containing missing values (geom_point).

if (FALSE) {
    # check
    SELECTED_FEATURE <- "ENSG00000066827_ZFAT"
    matrix_cpm_use[SELECTED_FEATURE, c(samples_hTSC, samples_tdhTSC)] %>%
        enframe(name = "sample", value = "CPM") %>%
        print(n = nrow(.))
}

R session info

devtools::session_info()$platform

##  setting  value                       
##  version  R version 4.0.2 (2020-06-22)
##  os       macOS Catalina 10.15.6      
##  system   x86_64, darwin19.5.0        
##  ui       unknown                     
##  language (EN)                        
##  collate  en_US.UTF-8                 
##  ctype    en_US.UTF-8                 
##  tz       America/Chicago             
##  date     2020-08-09

devtools::session_info()$pack %>%
    as_tibble() %>%
    select(
        package,
        loadedversion,
        date,
        `source`
    ) %>%
    # print(n = nrow(.))
    gt::gt() %>%
    gt::tab_options(table.font.size = "median")

package	loadedversion	date	source
annotate	1.66.0	2020-04-27	Bioconductor
AnnotationDbi	1.50.3	2020-07-25	Bioconductor
assertthat	0.2.1	2019-03-21	CRAN (R 4.0.0)
backports	1.1.8	2020-06-17	CRAN (R 4.0.1)
Biobase	2.48.0	2020-04-27	Bioconductor
BiocGenerics	0.34.0	2020-04-27	Bioconductor
BiocParallel	1.22.0	2020-04-27	Bioconductor
bit	4.0.4	2020-08-04	CRAN (R 4.0.2)
bit64	4.0.2	2020-07-30	CRAN (R 4.0.2)
bitops	1.0-6	2013-08-17	CRAN (R 4.0.0)
blob	1.2.1	2020-01-20	CRAN (R 4.0.0)
broom	0.7.0.9001	2020-08-09	Github (tidymodels/broom@a0bb105)
callr	3.4.3.9000	2020-07-31	Github (r-lib/callr@b96da8f)
cellranger	1.1.0	2016-07-27	CRAN (R 4.0.0)
checkmate	2.0.0	2020-02-06	CRAN (R 4.0.0)
cli	2.0.2	2020-02-28	CRAN (R 4.0.0)
codetools	0.2-16	2018-12-24	CRAN (R 4.0.2)
colorspace	1.4-1	2019-03-18	CRAN (R 4.0.0)
crayon	1.3.4	2017-09-16	CRAN (R 4.0.0)
DBI	1.1.0	2019-12-15	CRAN (R 4.0.0)
dbplyr	1.4.4.9000	2020-07-28	Github (tidyverse/dbplyr@a6ed629)
DelayedArray	0.14.1	2020-07-14	Bioconductor
desc	1.2.0	2018-05-01	CRAN (R 4.0.0)
DESeq2	1.28.1	2020-05-12	Bioconductor
devtools	2.3.1.9000	2020-07-31	Github (r-lib/devtools@df619ce)
digest	0.6.25	2020-02-23	CRAN (R 4.0.0)
dplyr	1.0.1.9001	2020-08-03	Github (tidyverse/dplyr@dc3e0e6)
ellipsis	0.3.1.9000	2020-07-18	Github (r-lib/ellipsis@57a5071)
evaluate	0.14	2019-05-28	CRAN (R 4.0.0)
extrafont	0.17	2014-12-08	CRAN (R 4.0.2)
extrafontdb	1.0	2012-06-11	CRAN (R 4.0.0)
fansi	0.4.1	2020-01-08	CRAN (R 4.0.0)
farver	2.0.3	2020-01-16	CRAN (R 4.0.0)
forcats	0.5.0.9000	2020-05-28	Github (tidyverse/forcats@ab81d1b)
fs	1.5.0.9000	2020-08-03	Github (r-lib/fs@93e70a9)
genefilter	1.70.0	2020-04-27	Bioconductor
geneplotter	1.66.0	2020-04-27	Bioconductor
generics	0.0.2	2018-11-29	CRAN (R 4.0.0)
GenomeInfoDb	1.24.2	2020-06-15	Bioconductor
GenomeInfoDbData	1.2.3	2020-04-24	Bioconductor
GenomicRanges	1.40.0	2020-04-27	Bioconductor
ggplot2	3.3.2.9000	2020-08-04	Github (tidyverse/ggplot2@6d91349)
ggrepel	0.9.0	2020-07-24	Github (slowkow/ggrepel@4d0ef50)
ggthemes	4.2.0	2019-05-13	CRAN (R 4.0.0)
glue	1.4.1.9000	2020-07-07	Github (tidyverse/glue@205f18b)
gt	0.2.2	2020-08-06	Github (rstudio/gt@c97cb4c)
gtable	0.3.0	2019-03-25	CRAN (R 4.0.0)
haven	2.3.1	2020-06-01	CRAN (R 4.0.0)
highr	0.8	2019-03-20	CRAN (R 4.0.0)
hms	0.5.3	2020-01-08	CRAN (R 4.0.0)
htmltools	0.5.0	2020-06-16	CRAN (R 4.0.1)
httr	1.4.2	2020-07-20	CRAN (R 4.0.2)
IRanges	2.22.2	2020-05-21	Bioconductor
jsonlite	1.7.0	2020-06-25	CRAN (R 4.0.2)
knitr	1.29	2020-06-23	CRAN (R 4.0.2)
labeling	0.3	2014-08-23	CRAN (R 4.0.0)
lattice	0.20-41	2020-04-02	CRAN (R 4.0.2)
lifecycle	0.2.0	2020-03-06	CRAN (R 4.0.0)
locfit	1.5-9.4	2020-03-25	CRAN (R 4.0.0)
lubridate	1.7.9	2020-07-11	Github (tidyverse/lubridate@de2ee09)
magrittr	1.5.0.9000	2020-08-04	Github (tidyverse/magrittr@ba52096)
Matrix	1.2-18	2019-11-27	CRAN (R 4.0.2)
matrixStats	0.56.0	2020-03-13	CRAN (R 4.0.0)
memoise	1.1.0	2017-04-21	CRAN (R 4.0.0)
modelr	0.1.8.9000	2020-05-19	Github (tidyverse/modelr@16168e0)
munsell	0.5.0	2018-06-12	CRAN (R 4.0.0)
patchwork	1.0.1.9000	2020-06-22	Github (thomasp85/patchwork@82a5e03)
pillar	1.4.6.9000	2020-07-21	Github (r-lib/pillar@8aef8f2)
pkgbuild	1.1.0.9000	2020-07-14	Github (r-lib/pkgbuild@3a87bd9)
pkgconfig	2.0.3	2019-09-22	CRAN (R 4.0.0)
pkgload	1.1.0	2020-05-29	CRAN (R 4.0.0)
prettyunits	1.1.1	2020-01-24	CRAN (R 4.0.2)
processx	3.4.3	2020-07-05	CRAN (R 4.0.2)
ps	1.3.3	2020-05-08	CRAN (R 4.0.0)
purrr	0.3.4.9000	2020-08-06	Github (tidyverse/purrr@f9d4821)
R6	2.4.1.9000	2020-07-18	Github (r-lib/R6@1415d11)
RColorBrewer	1.1-2	2014-12-07	CRAN (R 4.0.0)
Rcpp	1.0.5	2020-07-06	CRAN (R 4.0.2)
RcppAnnoy	0.0.16	2020-03-08	CRAN (R 4.0.0)
RCurl	1.98-1.2	2020-04-18	CRAN (R 4.0.0)
readr	1.3.1.9000	2020-07-16	Github (tidyverse/readr@2ab51b2)
readxl	1.3.1.9000	2020-05-28	Github (tidyverse/readxl@3815961)
remotes	2.2.0.9000	2020-08-06	Github (r-lib/remotes@5a546ad)
reprex	0.3.0	2019-05-16	CRAN (R 4.0.0)
rlang	0.4.7.9000	2020-07-31	Github (r-lib/rlang@a144ac0)
rmarkdown	2.3.3	2020-07-25	Github (rstudio/rmarkdown@204aa41)
rprojroot	1.3-2	2018-01-03	CRAN (R 4.0.0)
RSpectra	0.16-0	2019-12-01	CRAN (R 4.0.2)
RSQLite	2.2.0	2020-01-07	CRAN (R 4.0.0)
rstudioapi	0.11.0-9000	2020-07-31	Github (rstudio/rstudioapi@aa17630)
Rtsne	0.16	2020-07-03	Github (jkrijthe/Rtsne@14b195f)
Rttf2pt1	1.3.8	2020-01-10	CRAN (R 4.0.0)
rvest	0.3.6	2020-07-25	CRAN (R 4.0.2)
S4Vectors	0.26.1	2020-05-16	Bioconductor
sass	0.2.0	2020-03-18	CRAN (R 4.0.2)
scales	1.1.1.9000	2020-07-24	Github (r-lib/scales@9ff4757)
sessioninfo	1.1.1.9000	2020-07-18	Github (r-lib/sessioninfo@791705b)
stringi	1.4.6	2020-02-17	CRAN (R 4.0.0)
stringr	1.4.0.9000	2020-06-01	Github (tidyverse/stringr@f70c4ba)
styler	1.3.2.9000	2020-07-25	Github (r-lib/styler@16d815e)
SummarizedExperiment	1.18.2	2020-07-09	Bioconductor
survival	3.2-3	2020-06-13	CRAN (R 4.0.2)
testthat	2.99.0.9000	2020-07-28	Github (r-lib/testthat@0af11cd)
tibble	3.0.3.9000	2020-07-21	Github (tidyverse/tibble@b4eec19)
tidyr	1.1.1.9000	2020-07-31	Github (tidyverse/tidyr@61e9209)
tidyselect	1.1.0.9000	2020-07-11	Github (tidyverse/tidyselect@69fdc96)
tidyverse	1.3.0.9000	2020-06-01	Github (hadley/tidyverse@8a0bb99)
usethis	1.6.1.9001	2020-08-06	Github (r-lib/usethis@51fcdb8)
uwot	0.1.8.9000	2020-08-03	Github (jlmelville/uwot@db9e397)
vctrs	0.3.2.9000	2020-08-04	Github (r-lib/vctrs@1b3cd7c)
viridisLite	0.3.0	2018-02-01	CRAN (R 4.0.0)
withr	2.2.0	2020-04-20	CRAN (R 4.0.0)
xfun	0.16	2020-07-24	CRAN (R 4.0.2)
XML	3.99-0.5	2020-07-23	CRAN (R 4.0.2)
xml2	1.3.2	2020-04-23	CRAN (R 4.0.0)
xtable	1.8-4	2019-04-21	CRAN (R 4.0.0)
XVector	0.28.0	2020-04-27	Bioconductor
yaml	2.2.1	2020-02-01	CRAN (R 4.0.0)
zlibbioc	1.34.0	2020-04-27	Bioconductor

Transcriptomic analysis of trophoblast-like cells derived from naïve human embryonic stem cells

JD

2020-08-09