Comparative Analysis of Single Cell Transcriptomes of Human Blastoids

Author

Jialei Duan

Published

Sun Sep 25, 2022 00:36:12-05:00

Doi
Abstract

Human blastoids provide a readily accessible, scalable, versatile and perturbable alternative to blastocysts for studying early human development, understanding early pregnancy loss and gaining insights into early developmental defects.


In this manuscript, several transcriptome data sets generated by different technologies were included. To minimize platform and processing differences, raw fastq files of public datasets using the Illumina sequencing platform were downloaded and re-processed.



[1] "2022-09-25 00:35:30 CDT"
[1] "America/Chicago"

Preparation

Functions

Load required packages.

source(
    file = file.path(
        SCRIPT_DIR,
        "utilities.R"
    )
)

load_matrix <- function(x) {
    matrix_readcount_use <- scipy$sparse$load_npz(
        file.path(x, "matrix_readcount.npz")
    )
    colnames(matrix_readcount_use) <- np$load(
        file.path(x, "matrix_readcount_barcodes.npy")
    )
    rownames(matrix_readcount_use) <- np$load(
        file.path(x, "matrix_readcount_features.npy")
    )

    return(matrix_readcount_use)
}

perform_pca <- function(matrix, features, min_counts = 1) {
    matrix_norm <- matrix[
        Matrix::rowSums(
            matrix
        ) >= min_counts,
    ]

    matrix_norm <- as(matrix_norm, "dgCMatrix")
    matrix_norm@x <- median(colSums(matrix_norm)) *
        (matrix_norm@x / rep.int(
            colSums(matrix_norm),
            diff(matrix_norm@p)
        ))

    matrix_norm_log <- matrix_norm[
        rownames(matrix_norm) %in% features,
    ]
    matrix_norm_log@x <- log1p(matrix_norm_log@x)

    # z-score
    matrix_norm_log_scaled <- t(
        scale(
            t(
                matrix_norm_log
            ),
            center = TRUE, scale = TRUE
        )
    )

    # PCA
    pca_out <- prcomp(
        t(matrix_norm_log_scaled),
        center = FALSE,
        scale = FALSE
    )

    return(pca_out)
}

Python env

np <- reticulate::import("numpy", convert = TRUE)
scipy <- reticulate::import(module = "scipy", convert = TRUE)
ad <- reticulate::import(module = "anndata", convert = TRUE)
reticulate::py_config()
python:         /Users/jialei/.pyenv/shims/python
libpython:      /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10/lib/libpython3.9.dylib
pythonhome:     /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10:/Users/jialei/.pyenv/versions/mambaforge-4.10.3-10
version:        3.9.13 | packaged by conda-forge | (main, May 27 2022, 17:00:33)  [Clang 13.0.1 ]
numpy:          /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10/lib/python3.9/site-packages/numpy
numpy_version:  1.22.4
numpy:          /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10/lib/python3.9/site-packages/numpy

NOTE: Python version was forced by RETICULATE_PYTHON
cat("numpy:", np$`__version__`, "\n")
## numpy: 1.22.4
cat("scipy:", scipy$`__version__`, "\n")
## scipy: 1.9.1
cat("anndata:", ad$`__version__`, "\n")
## anndata: 0.8.0
PROJECT_DIR <- "/Users/jialei/Dropbox/Data/Projects/UTSW/Human_blastoid"

Matrix

MATRIX_DIR <- list(
    "github/data/matrices/LW36",
    "github/data/matrices/LW49_LW50_LW51_LW52",
    "github/data/matrices/LW58_LW59",
    "github/data/matrices/LW60_LW61",
    "raw/public/PRJEB11202/reformatted_matrix"
)

matrix_readcount_use <- purrr::map(MATRIX_DIR, \(x) {
    load_matrix(file.path(PROJECT_DIR, x))
}) |>
    purrr::reduce(cbind)

# clean up
rm(MATRIX_DIR)

Embedding

EMBEDDING_FILE <- "embedding.csv.gz"

embedding <- read_csv(
    file = file.path(
        PROJECT_DIR,
        "github",
        "data",
        EMBEDDING_FILE
    )
)

# clean up
rm(EMBEDDING_FILE)

Metadata

cell_metadata_PRJEB11202 <- read_delim(
    file.path(
        PROJECT_DIR,
        "raw/public/PRJEB11202/",
        "E-MTAB-3929.sdrf.tsv"
    ),
    delim = "\t"
) |>
    dplyr::select(
        `Comment[ENA_SAMPLE]`,
        `Comment[ENA_RUN]`,
        `Characteristics[developmental stage]`,
        `Characteristics[inferred lineage]`
    ) |>
    dplyr::rename(
        cell = `Comment[ENA_SAMPLE]`,
        run = `Comment[ENA_RUN]`,
        developmental_stage = `Characteristics[developmental stage]`,
        lineage = `Characteristics[inferred lineage]`
    ) |>
    dplyr::mutate(
        developmental_stage = stringr::str_replace(
            string = developmental_stage,
            pattern = "embryonic day ",
            replacement = "E"
        ),
        lineage = dplyr::case_when(
            lineage == "epiblast" ~ "EPI",
            lineage == "primitive endoderm" ~ "HYP",
            lineage == "trophectoderm" ~ "TE",
            lineage == "not applicable" ~ "Pre-lineage"
        )
    )

embedding <- embedding |>
    dplyr::left_join(
        cell_metadata_PRJEB11202
    ) |>
    dplyr::mutate(
        lineage = dplyr::case_when(
            is.na(lineage) ~ "Blastoid",
            TRUE ~ as.character(lineage)
        ),
        developmental_stage = dplyr::case_when(
            is.na(developmental_stage) ~ "Blastoid",
            TRUE ~ as.character(developmental_stage)
        )
    )


walk(list(matrix_readcount_use), \(x) {
    print(object.size(x), units = "auto", standard = "SI")
})
1.6 GB

Pre/post-implantation comparison

Blastoid

matrix_readcount_blastoid_pseudobulk <- purrr::map(
    sort(unique(embedding$louvain)), \(x) {
        cells_in_group <- embedding |>
            dplyr::filter(
                louvain == x,
                batch != "PRJEB11202"
            ) |>
            dplyr::pull(cell)

        Matrix::rowSums(matrix_readcount_use[, cells_in_group])
    }
) |>
    purrr::reduce(cbind) |>
    `colnames<-`(sort(unique(embedding$louvain)))
EMBEDDING_FILE <- "embedding_stem.csv.gz"

embedding_stem <- read_csv(
    file = file.path(
        PROJECT_DIR,
        "github",
        "data",
        EMBEDDING_FILE
    )
)

matrix_readcount_stem_pseudobulk <- purrr::map(
    sort(unique(embedding_stem$batch)), \(x) {
        cells_in_group <- embedding_stem |>
            dplyr::filter(batch == x) |>
            dplyr::pull(cell)

        Matrix::rowSums(matrix_readcount_use[, cells_in_group])
    }
) |>
    purrr::reduce(cbind) |>
    `colnames<-`(sort(unique(embedding_stem$batch)))

Public datasets

Petropoulos et al

Single-cell RNA-Seq reveals lineage and X chromosome dynamics in human preimplantation embryos

matrix_readcount_PRJEB11202_pseudobulk_EPI <- purrr::map(
    c("E5", "E6", "E7"), \(x) {
        cells_in_group <- embedding |>
            dplyr::filter(
                lineage == "EPI",
                developmental_stage == x
            ) |>
            dplyr::pull(cell)

        Matrix::rowSums(matrix_readcount_use[, cells_in_group])
    }
) |>
    purrr::reduce(cbind) |>
    `colnames<-`(c("E5", "E6", "E7")) |>
    as.matrix()

matrix_readcount_PRJEB11202_pseudobulk_TE <- purrr::map(
    c("E5", "E6", "E7"), \(x) {
        cells_in_group <- embedding |>
            dplyr::filter(
                lineage == "TE",
                developmental_stage == x
            ) |>
            dplyr::pull(cell)

        Matrix::rowSums(matrix_readcount_use[, cells_in_group])
    }
) |>
    purrr::reduce(cbind) |>
    `colnames<-`(c("E5", "E6", "E7")) |>
    as.matrix()

matrix_readcount_PRJEB11202_pseudobulk_HYP <- purrr::map(
    c("E5", "E6", "E7"), \(x) {
        cells_in_group <- embedding |>
            dplyr::filter(
                lineage == "HYP",
                developmental_stage == x
            ) |>
            dplyr::pull(cell)

        Matrix::rowSums(matrix_readcount_use[, cells_in_group])
    }
) |>
    purrr::reduce(cbind) |>
    `colnames<-`(c("E5", "E6", "E7")) |>
    as.matrix()

Xiang et al

A developmental landscape of 3D-cultured human pre-gastrulation embryos

adata_files <- c(
    file.path(
        MATRIX_DIR,
        "adata.h5ad"
    )
)

BACKED <- NULL
matrix_readcount_PRJNA562548 <- purrr::map(adata_files, \(x) {
    ad$read_h5ad(
        filename = x, backed = BACKED
    ) |>
        convert_adata()
}) |>
    purrr::reduce(cbind)
cell_metadata_PRJNA562548 <- vroom::vroom(
    file = file.path(
        MATRIX_DIR, "cell_metadata.csv"
    )
) |>
    dplyr::select(
        cell = `Sample Name`,
        developmental_stage = Age,
        lineage = Cell_type,
        run = Run
    ) |>
    dplyr::mutate(
        developmental_stage = stringr::str_replace(
            string = developmental_stage,
            pattern = "embryo invitro day ",
            replacement = "E"
        ),
        developmental_stage = factor(
            developmental_stage,
            levels = stringr::str_sort(
                x = unique(developmental_stage),
                numeric = TRUE
            )
        ),
        lineage = factor(
            lineage,
            levels = c(
                "ICM",
                "EPI",
                "PSA-EPI",
                "Hypoblast",
                "CTBs",
                "STBs",
                "EVTs"
            )
        )
    )
matrix_readcount_PRJNA562548_pseudobulk_EPI <- purrr::map(
    levels(cell_metadata_PRJNA562548$developmental_stage), \(x) {
        cells_in_group <- cell_metadata_PRJNA562548 |>
            dplyr::filter(
                lineage == "EPI",
                developmental_stage == x
            ) |>
            dplyr::pull(cell)

        Matrix::rowSums(matrix_readcount_PRJNA562548[, cells_in_group])
    }
) |>
    purrr::reduce(cbind) |>
    `colnames<-`(levels(cell_metadata_PRJNA562548$developmental_stage)) |>
    as.matrix()

matrix_readcount_PRJNA562548_pseudobulk_HYP <- purrr::map(
    levels(cell_metadata_PRJNA562548$developmental_stage), \(x) {
        cells_in_group <- cell_metadata_PRJNA562548 |>
            dplyr::filter(
                lineage == "Hypoblast",
                developmental_stage == x
            ) |>
            dplyr::pull(cell)

        if (length(cells_in_group) > 1) {
            m <- Matrix::rowSums(matrix_readcount_PRJNA562548[, cells_in_group])
        } else {
            m <- matrix_readcount_PRJNA562548[, cells_in_group]
        }

        return(m)
    }
) |>
    purrr::reduce(cbind) |>
    `colnames<-`(
        cell_metadata_PRJNA562548 |>
            dplyr::filter(lineage == "Hypoblast") |>
            dplyr::pull(developmental_stage) |>
            droplevels() |>
            levels()
    ) |>
    as.matrix()

# clean up
rm(matrix_readcount_PRJNA562548)
rm(cell_metadata_PRJNA562548)

Zhou et al

Reconstituting the transcriptome and DNA methylome landscapes of human implantation

adata_files <- c(
    file.path(
        MATRIX_DIR,
        "adata.h5ad"
    )
)

BACKED <- NULL
matrix_readcount_PRJNA431392 <- purrr::map(adata_files, \(x) {
    ad$read_h5ad(
        filename = x, backed = BACKED
    ) |>
        convert_adata()
}) |>
    purrr::reduce(cbind)
embryos_selected_PRJNA431392 <- c(
    "ha_D6_E2",
    "hm_D6_E1",
    "hm_D6_E2",
    "hm_D8_E2",
    "hm_D8_E3",
    "hm_D8_E5",
    "ha_D8_E1",
    "hm_D8_E1",
    "hv_D8_E1",
    "hv_D8_E2",
    "hv_D8_E3",
    "hv_D10_E6",
    "ha_D10_E1",
    "ha_D10_E2",
    "hm_D10_E4",
    "hm_D10_E9",
    "hv_D10_E7",
    "hv_D10_E8",
    "ha_D12_E1",
    "hv_D12_E1",
    "hv_D12_E2"
)

cell_metadata_PRJNA431392 <- vroom::vroom(
    file = file.path(
        MATRIX_DIR, "cell_metadata.csv"
    )
) |>
    dplyr::mutate(
        developmental_stage = str_replace(
            string = Day,
            pattern = "D",
            replacement = "E"
        ),
        developmental_stage = factor(
            developmental_stage,
            levels = stringr::str_sort(
                unique(developmental_stage),
                numeric = TRUE
            )
        ),
        lineage = factor(
            Lineage,
            levels = c(
                "EPI",
                "PE",
                "TE",
                "MIX"
            )
        ),
        `3184` = case_when(
            Ori_Day_Emb %in% embryos_selected_PRJNA431392 ~ "1",
            TRUE ~ "0"
        )
    ) |>
    dplyr::rename(cell = Sample)

cell_metadata_PRJNA431392 <- cell_metadata_PRJNA431392 |>
    dplyr::filter(`3184` == 1) |>
    dplyr::mutate(developmental_stage = droplevels(developmental_stage))
matrix_readcount_PRJNA431392_pseudobulk_EPI <- purrr::map(
    levels(cell_metadata_PRJNA431392$developmental_stage), \(x) {
        cells_in_group <- cell_metadata_PRJNA431392 |>
            dplyr::filter(
                developmental_stage == x,
                lineage == "EPI"
            ) |>
            dplyr::pull(cell)

        Matrix::rowSums(matrix_readcount_PRJNA431392[, cells_in_group])
    }
) |>
    purrr::reduce(cbind) |>
    `colnames<-`(levels(cell_metadata_PRJNA431392$developmental_stage)) |>
    as.matrix()

matrix_readcount_PRJNA431392_pseudobulk_TE <- purrr::map(
    levels(cell_metadata_PRJNA431392$developmental_stage), \(x) {
        cells_in_group <- cell_metadata_PRJNA431392 |>
            dplyr::filter(
                developmental_stage == x,
                lineage == "TE"
            ) |>
            dplyr::pull(cell)

        Matrix::rowSums(matrix_readcount_PRJNA431392[, cells_in_group])
    }
) |>
    purrr::reduce(cbind) |>
    `colnames<-`(levels(cell_metadata_PRJNA431392$developmental_stage)) |>
    as.matrix()

matrix_readcount_PRJNA431392_pseudobulk_HYP <- purrr::map(
    levels(cell_metadata_PRJNA431392$developmental_stage), \(x) {
        cells_in_group <- cell_metadata_PRJNA431392 |>
            dplyr::filter(
                developmental_stage == x,
                lineage == "PE"
            ) |>
            dplyr::pull(cell)

        Matrix::rowSums(matrix_readcount_PRJNA431392[, cells_in_group])
    }
) |>
    purrr::reduce(cbind) |>
    `colnames<-`(levels(cell_metadata_PRJNA431392$developmental_stage)) |>
    as.matrix()

# clean up
rm(matrix_readcount_PRJNA431392)
rm(cell_metadata_PRJNA431392)

Takashima et al

Resetting transcription factor control circuitry toward ground-state pluripotency in human

matrix_readcount_PRJEB7132 <- load_matrix(
    file.path(PROJECT_DIR, "raw/public/PRJEB7132")
)

matrix_readcount_PRJEB7132_pseudobulk <- cbind(
    H9 = Matrix::rowSums(
        matrix_readcount_PRJEB7132[, c("ERS537888", "ERS537890", "ERS537878")]
    ),
    H9_reset = Matrix::rowSums(
        matrix_readcount_PRJEB7132[, c("ERS537884", "ERS537881", "ERS537876")]
    )
) |>
    as.matrix()

# clean up
rm(matrix_readcount_PRJEB7132)

Dong et al

Derivation of trophoblast stem cells from naïve human pluripotent stem cells

matrix_readcount_PRJNA576801 <- load_matrix(
    file.path(PROJECT_DIR, "raw/public/PRJNA576801")
)

geo_info_PRJNA576801 <- tibble::tribble(
    ~sample_name, ~sample_annotation, ~group, ~cell_line,
    # "GSM4116149", "H9 primed TSC Bulk RNA-seq", "Primed hPSC (hTSC medium)", "H9",
    # "GSM4116150", "AN primed TSC Bulk RNA-seq", "Primed hPSC (hTSC medium)", "AN",
    "GSM4116151", "H9 primed Bulk RNA-seq", "Primed hPSC", "H9",
    "GSM4116152", "AN primed Bulk RNA-seq", "Primed hPSC", "AN",
    # "GSM4116153", "H9 primed StemPro Bulk RNA-seq", "Re-primed hPSC", "H9",
    # "GSM4116154", "H9 capacitated 1 Bulk RNA-seq", "Capacitated hPSC", "H9",
    # "GSM4116155", "H9 capacitated 2 Bulk RNA-seq", "Capacitated hPSC", "H9",
    "GSM4116156", "H9 naïve Bulk RNA-seq", "Naïve hPSC", "H9",
    "GSM4116157", "AN naïve Bulk RNA-seq", "Naïve hPSC", "AN",
    "GSM4116158", "H9 naïve TSC Bulk RNA-seq", "Naïve hTSC", "H9",
    "GSM4116159", "AN naïve TSC Bulk RNA-seq", "Naïve hTSC", "AN",
    "GSM4116160", "WIBR3 naïve TSC Bulk RNA-seq", "Naïve hTSC", "WIBR3",
    # "GSM4116161", "H9 EVT Bulk RNA-seq", "EVT", "H9",
    # "GSM4116162", "AN EVT Bulk RNA-seq", "EVT", "AN",
    # "GSM4116163", "H9 STB Bulk RNA-seq", "STB", "H9",
    # "GSM4116164", "AN STB Bulk RNA-seq", "STB", "AN",
    "GSM4276363", "H9 naïve TSC rep 2 Bulk RNA-seq", "Naïve hTSC", "H9",
    "GSM4276364", "AN naïve TSC rep 2 Bulk RNA-seq", "Naïve hTSC", "AN",
    # "GSM4276365", "BT5 hTSC rep 1 Bulk RNA-seq", "BT5 hTSCs", "BT5",
    # "GSM4276366", "BT5 hTSC rep 2 Bulk RNA-seq", "BT5 hTSCs", "BT5"
)

cell_metadata_PRJNA576801 <- vroom::vroom(
    file = file.path(
        PROJECT_DIR,
        "raw/public/PRJNA576801",
        "SraRunTable.txt"
    ),
    delim = ","
) |>
    dplyr::select(
        sample_name = "Sample Name",
        run = "Run",
        biosample = "BioSample",
        source_name
    ) |>
    dplyr::mutate(
        source_name = stringr::str_remove(
            string = source_name, pattern = "\\s.+$"
        ),
        source_name = factor(
            source_name,
            levels = c("hPSC", "hTSC", "EVT", "STB"),
        )
    ) |>
    dplyr::left_join(geo_info_PRJNA576801) |>
    dplyr::mutate(cell_line = factor(cell_line)) |>
    tidyr::drop_na() |>
    dplyr::mutate(group = factor(group))
matrix_readcount_PRJNA576801_pseudobulk_lineage <- purrr::map(
    levels(cell_metadata_PRJNA576801$group), \(x) {
        cells_in_group <- cell_metadata_PRJNA576801 |>
            dplyr::filter(
                group == x
            ) |>
            dplyr::pull(sample_name)

        Matrix::rowSums(matrix_readcount_PRJNA576801[, cells_in_group])
    }
) |>
    purrr::reduce(cbind) |>
    `colnames<-`(levels(cell_metadata_PRJNA576801$group)) |>
    as.matrix()


colnames(matrix_readcount_PRJNA576801_pseudobulk_lineage) <-
    stringr::str_replace(
        string = colnames(matrix_readcount_PRJNA576801_pseudobulk_lineage),
        pattern = " ", replacement = "_"
    )

rm(matrix_readcount_PRJNA576801)
rm(geo_info_PRJNA576801)
rm(cell_metadata_PRJNA576801)

Cinkornpumin et al

Naïve human embryonic stem cells can give rise to cells with a trophoblast-like transcriptome and methylome

matrix_readcount_PRJNA638350 <- load_matrix(
    file.path(PROJECT_DIR, "raw/public/PRJNA638350")
)

geo_info_PRJNA638350 <- tibble::tribble(
    ~cell, ~sample_description, ~source_name2, ~group, ~cell_line,
    "GSM4603117", "BT2 hTSC Rep1", "BT2 hTSC", "hTSC", "BT2",
    "GSM4603118", "BT2 hTSC Rep2", "BT2 hTSC", "hTSC", "BT2",
    "GSM4603119", "BT2 hTSC Rep3", "BT2 hTSC", "hTSC", "BT2",
    "GSM4603120", "BT2 hTSC Rep4", "BT2 hTSC", "hTSC", "BT2",
    "GSM4603121", "CT1 hTSCs Rep1", "CT1 hTSC", "hTSC", "CT1",
    "GSM4603122", "CT1 hTSCs Rep2", "CT1 hTSC", "hTSC", "CT1",
    "GSM4603123", "CT1 hTSCs Rep3", "CT1 hTSC", "hTSC", "CT1",
    "GSM4603124", "CT1 hTSCs Rep4", "CT1 hTSC", "hTSC", "CT1",
    "GSM4603125", "CT1 hTSCs Rep5", "CT1 hTSC", "hTSC", "CT1",
    "GSM4603126", "CT1 hTSCs Rep6", "CT1 hTSC", "hTSC", "CT1",
    "GSM4603127", "CT1 hTSCs Rep7", "CT1 hTSC", "hTSC", "CT1",
    "GSM4603128", "CT3 hTSCs Rep1", "CT3 hTSC", "hTSC", "CT3",
    "GSM4603129", "CT3 hTSCs Rep2", "CT3 hTSC", "hTSC", "CT3",
    "GSM4603130", "CT3 hTSCs Rep3", "CT3 hTSC", "hTSC", "CT3",
    "GSM4603131", "CT3 hTSCs Rep4", "CT3 hTSC", "hTSC", "CT3",
    # "GSM4603132", "UCLA1 Primed hESCs Rep1", "UCLA1 Primed hESC", "Primed hESC", "UCLA1",
    # "GSM4603133", "UCLA1 Primed hESCs Rep2", "UCLA1 Primed hESC", "Primed hESC", "UCLA1",
    # "GSM4603134", "UCLA1 Primed hESCs Rep3", "UCLA1 Primed hESC", "Primed hESC", "UCLA1",
    "GSM4603135", "WIBR3 Primed hESCs Rep 1", "WIBR3 Primed hESC", "Primed hESC", "WIBR3",
    "GSM4603136", "WIBR3 Primed hESCs Rep 2", "WIBR3 Primed hESC", "Primed hESC", "WIBR3",
    "GSM4603137", "WIBR3 Primed hESCs Rep 3", "WIBR3 Primed hESC", "Primed hESC", "WIBR3",
    "GSM4603138", "WIBR3 Primed hESCs Rep 4", "WIBR3 Primed hESC", "Primed hESC", "WIBR3",
    # "GSM4603139", "WIBR3 Naive hESCs Rep 1", "WIBR3 Naive hESC", "Primed hESC", "WIBR3",
    # "GSM4603140", "WIBR3 Naive hESCs Rep 2", "WIBR3 Naive hESC", "Primed hESC", "WIBR3",
    "GSM4603139", "WIBR3 Naive hESCs Rep 1", "WIBR3 Naive hESC", "Naive hESC", "WIBR3",
    "GSM4603140", "WIBR3 Naive hESCs Rep 2", "WIBR3 Naive hESC", "Naive hESC", "WIBR3",
    "GSM4603141", "WIBR3 tdhTSC Line 1 Rep 1", "WIBR3 tdhTSC", "tdhTSC", "WIBR3",
    "GSM4603142", "WIBR3 tdhTSC Line 1 Rep 2", "WIBR3 tdhTSC", "tdhTSC", "WIBR3",
    "GSM4603143", "WIBR3 tdhTSC Line 2 Rep1", "WIBR3 tdhTSC", "tdhTSC", "WIBR3",
    "GSM4603144", "WIBR3 tdhTSC Line 2 Rep 2", "WIBR3 tdhTSC", "tdhTSC", "WIBR3",
    "GSM4603145", "WIBR3 tdhTSC Line 3 Rep 1", "WIBR3 tdhTSC", "tdhTSC", "WIBR3",
    "GSM4603146", "UCLA1 tdhTSC Line 1 Rep 1", "UCLA1 tdhTSC", "tdhTSC", "UCLA1",
    "GSM4603147", "UCLA1 tdhTSC Line 1 Rep 2", "UCLA1 tdhTSC", "tdhTSC", "UCLA1",
    "GSM4603148", "FT190", "FT190", "FT190", "FT190",
    "GSM4603149", "Hec116", "Hec116", "Hec116", "Hec116"
)

cell_metadata_PRJNA638350 <- vroom::vroom(
    file = file.path(
        PROJECT_DIR,
        "raw/public/PRJNA638350",
        "SraRunTable.txt"
    ),
    delim = ","
) |>
    dplyr::rename_with(~ tolower(gsub(" ", "_", .x, fixed = TRUE))) |>
    dplyr::select(
        cell = sample_name,
        run,
        biosample,
        source_name,
        cell_type
    ) |>
    dplyr::left_join(geo_info_PRJNA638350, by = "cell") |>
    dplyr::mutate(
        group = factor(
            group,
            levels = c("Naive hESC", "Primed hESC", "tdhTSC", "hTSC", "FT190", "Hec116")
        )
    ) |>
    tidyr::drop_na()
matrix_readcount_PRJNA638350_pseudobulk_lineage <- purrr::map(
    levels(cell_metadata_PRJNA638350$group), \(x) {
        cells_in_group <- cell_metadata_PRJNA638350 |>
            dplyr::filter(
                group == x
            ) |>
            dplyr::pull(cell)

        if (length(cells_in_group) > 1) {
            m <- Matrix::rowSums(matrix_readcount_PRJNA638350[, cells_in_group])
        } else {
            m <- matrix_readcount_PRJNA638350[, cells_in_group]
        }
        return(m)
    }
) |>
    purrr::reduce(cbind) |>
    `colnames<-`(levels(cell_metadata_PRJNA638350$group)) |>
    as.matrix()

colnames(matrix_readcount_PRJNA638350_pseudobulk_lineage) <-
    stringr::str_replace(
        colnames(matrix_readcount_PRJNA638350_pseudobulk_lineage),
        pattern = " ", replacement = "_"
    )

# clean up
rm(matrix_readcount_PRJNA638350)
rm(geo_info_PRJNA638350)
rm(cell_metadata_PRJNA638350)

Lv et al

Single-cell RNA sequencing reveals regulatory mechanism for trophoblast cell-fate divergence in human peri-implantation conceptuses

matrix_readcount_PRJNA516921 <- load_matrix(
    file.path(PROJECT_DIR, "raw/public/PRJNA516921")
)

geo_info_PRJNA516921 <- read_delim(
    file = file.path(
        PROJECT_DIR,
        "raw/public/PRJNA516921",
        "notebooks",
        "sample.csv"
    ),
    delim = ","
)

cell_metadata_PRJNA516921 <- vroom::vroom(
    file = file.path(
        PROJECT_DIR,
        "raw/public/PRJNA516921",
        "SraRunTable.txt"
    ),
    delim = ","
) |>
    dplyr::rename_with(~ tolower(gsub(" ", "_", .x, fixed = TRUE))) |>
    dplyr::select(
        cell = sample_name,
        run,
        biosample,
        development_day,
        source_name,
        stage
    ) |>
    dplyr::left_join(
        geo_info_PRJNA516921 |>
            select(Accession, Title),
        by = c("cell" = "Accession")
    ) |>
    dplyr::rename_all(tolower) |>
    dplyr::rename(
        developmental_stage = "development_day",
    ) |>
    dplyr::mutate(
        developmental_stage = stringr::str_replace(
            string = developmental_stage,
            pattern = "day",
            replacement = "Day "
        ),
        developmental_stage = factor(
            developmental_stage,
            levels = developmental_stage |>
                unique() |>
                str_sort(numeric = TRUE)
        ),
        stage = stringr::str_to_title(string = stage),
        stage = factor(
            stage,
            levels = c(
                "Blastocyst", "Implantation",
                "Post-Implantation", "Endometrial"
            )
        )
    )

cell_metadata_PRJNA516921 <- vroom::vroom(
    file = file.path(
        PROJECT_DIR,
        "raw/public/PRJNA516921",
        "exploring/embedding_ncomponents14_seed20200317.csv"
    )
) |>
    dplyr::left_join(
        cell_metadata_PRJNA516921,
        by = c("cell" = "cell")
    ) |>
    dplyr::select(
        cell:y_umap, run:dplyr::last_col()
    ) |>
    dplyr::filter(
        !(
            louvain == 3 | (
                developmental_stage == "Day 6" & louvain == 1
            ) | (
                developmental_stage %in% c("Day 13", "Day 14")
            )
        )
    ) |>
    dplyr::mutate(
        lineage = dplyr::case_when(
            cell %in% c(
                "GSM3578305",
                "GSM3578308",
                "GSM3578339",
                "GSM3578505",
                "GSM3578651",
                "GSM3578653",
                "GSM3578682",
                "GSM3578696",
                "GSM3578697",
                "GSM3578707",
                "GSM3578729"
            ) ~ "PE",
            cell %in% c(
                "GSM3578300",
                "GSM3578302",
                "GSM3578365",
                "GSM3578492",
                "GSM3578515",
                "GSM3578666",
                "GSM3578668",
                "GSM3578714",
                "GSM3578716",
                "GSM3578728"
            ) ~ "EPI",
            TRUE ~ "TE"
        ),
        lineage = factor(
            lineage,
            levels = c("EPI", "PE", "TE")
        )
    )
matrix_readcount_PRJNA516921_pseudobulk_TE <- purrr::map(
    levels(cell_metadata_PRJNA516921$developmental_stage), \(x) {
        cells_in_group <- cell_metadata_PRJNA516921 |>
            dplyr::filter(
                lineage == "TE",
                developmental_stage == x
            ) |>
            dplyr::pull(cell)

        if (length(cells_in_group) > 1) {
            m <- Matrix::rowSums(matrix_readcount_PRJNA516921[, cells_in_group])
        } else {
            m <- matrix_readcount_PRJNA516921[, cells_in_group]
        }
        return(m)
    }
) |>
    purrr::reduce(cbind) |>
    `colnames<-`(cell_metadata_PRJNA516921 |>
        dplyr::filter(lineage == "TE") |>
        dplyr::pull(developmental_stage) |>
        droplevels() |>
        levels()) |>
    as.matrix()

rm(matrix_readcount_PRJNA516921)
rm(geo_info_PRJNA516921)
rm(cell_metadata_PRJNA516921)

Linneberg-Agerholm et al

Naïve human pluripotent stem cells respond to Wnt, Nodal and LIF signalling to produce expandable naïve extra-embryonic endoderm

matrix_readcount_PRJNA574150 <- load_matrix(
    file.path(PROJECT_DIR, "raw/public/PRJNA574150")
)

geo_info_PRJNA574150 <- tibble::tribble(
    ~cell, ~sample_description,
    "GSM4096610", "H9_pES_R1",
    "GSM4096611", "H9_pES_R2",
    "GSM4096612", "H9_t2iLGo_nES_R2",
    "GSM4096613", "H9_t2iLGo_nES_R1",
    "GSM4096614", "H9_t2iLGo_nES_R3",
    "GSM4096615", "H9_h2iL_nES_R1",
    "GSM4096616", "H9_h2iL_nES_R2",
    "GSM4096617", "H9_h2iL_nES_R3",
    "GSM4096618", "H9_t2iLGo_PrE_R2",
    "GSM4096619", "H9_t2iLGo_PrE_R3",
    "GSM4096620", "H9_t2iLGo_PrE_R1",
    "GSM4096621", "H9_h2iL_PrE_R2",
    "GSM4096622", "H9_h2iL_PrE_R3",
    "GSM4096623", "H9_h2iL_PrE_R1",
    "GSM4096624", "HUES4_pES_R1",
    "GSM4096625", "HUES4_pES_R2",
    "GSM4096626", "H9_ADE_R1",
    "GSM4096627", "H9_ADE_R2",
    "GSM4096628", "HUES4_ADE_R2",
    "GSM4096629", "HUES4_ADE_R1",
    "GSM4096630", "H9_RSeT_nES_R1",
    "GSM4096631", "H9_RSeT_nES_R2",
    "GSM4096632", "H9_t2iLGo_nEnd_R1",
    "GSM4096633", "H9_t2iLGo_nEnd_R2",
    "GSM4096634", "H9_t2iLGo_nEnd_R3",
    "GSM4096635", "H9_RSeT_PrE_R1-1",
    "GSM4096636", "H9_RSeT_PrE_R2-1",
    "GSM4096637", "H9_DE_R1",
    "GSM4096638", "H9_DE_R2",
    "GSM4096639", "H9_DE_R3",
    "GSM4096640", "H9_RSeT_PrE_R1-2",
    "GSM4096641", "H9_RSeT_PrE_R2-2",
    "GSM4096642", "H9_RSeT_nEnd_R1",
    "GSM4096643", "H9_RSeT_nEnd_R2",
    "GSM4096644", "H9_RSeT_nEnd_R3"
)

cell_metadata_PRJNA574150 <- vroom::vroom(
    file = file.path(
        PROJECT_DIR,
        "raw/public/PRJNA574150",
        "SraRunTable.txt"
    ),
    delim = ","
) |>
    dplyr::rename_with(~ tolower(gsub(" ", "_", .x, fixed = TRUE))) |>
    dplyr::select(
        cell = sample_name,
        run,
        biosample,
        cell_type,
        background,
        source_name,
        stage
    ) |>
    dplyr::left_join(geo_info_PRJNA574150, by = "cell") |>
    dplyr::mutate(
        group = stringr::str_remove(
            string = sample_description,
            pattern = "_R\\d.*$"
        ),
        lineage = stringr::str_remove(
            string = group, pattern = ".+_"
        ),
        lineage = factor(
            lineage,
            levels = c("nES", "pES", "PrE", "nEnd", "DE", "ADE")
        )
    ) |>
    dplyr::rename(cell_line = "background")
matrix_readcount_PRJNA574150_pseudobulk_lineage <- purrr::map(
    levels(cell_metadata_PRJNA574150$lineage), \(x) {
        cells_in_group <- cell_metadata_PRJNA574150 |>
            dplyr::filter(
                lineage == x,
            ) |>
            dplyr::pull(cell)

        if (length(cells_in_group) > 1) {
            m <- Matrix::rowSums(matrix_readcount_PRJNA574150[, cells_in_group])
        } else {
            m <- matrix_readcount_PRJNA574150[, cells_in_group]
        }
        return(m)
    }
) |>
    purrr::reduce(cbind) |>
    `colnames<-`(levels(cell_metadata_PRJNA574150$lineage)) |>
    as.matrix()

rm(matrix_readcount_PRJNA574150)
rm(geo_info_PRJNA574150)
rm(cell_metadata_PRJNA574150)

Principal component analysis

features_hg_cy <- vroom::vroom(
    file = file.path(
        PROJECT_DIR,
        "github",
        "data",
        "features_hg_cy.csv.gz"
    )
)
# prepare features
table_s3_sheet1 <- readxl::read_excel(
    path = file.path(
        PROJECT_DIR,
        "docs",
        "Reconstituting_the_transcriptome_and_DNA_methylome_landscapes_of_human_implantation",
        "41586_2019_1500_MOESM3_ESM",
        "Supplementary Table 3 Lineage_Markers.xlsx"
    ),
    sheet = "Sheet1"
)
table_s3_sheet1 |>
    head() |>
    knitr::kable()
gene myAUC avg_diff power pct.1 pct.2 cluster
DPPA5 0.987 4.287821 0.974 0.985 0.247 EPI
IFITM1 0.987 3.334143 0.974 0.997 0.593 EPI
POU5F1 0.986 3.814455 0.972 0.988 0.355 EPI
KHDC3L 0.970 4.646703 0.940 0.958 0.123 EPI
TDGF1 0.970 3.947136 0.940 0.958 0.202 EPI
ESRG 0.964 3.447912 0.928 0.961 0.393 EPI
features_hg_EPI <- rownames(matrix_readcount_use)[
    stringr::str_remove(
        string = rownames(matrix_readcount_use),
        pattern = "^E.+_"
    ) %in% (table_s3_sheet1 |>
        dplyr::filter(
            cluster == "EPI"
        ) |>
        dplyr::pull(gene))
] |>
    unique()

features_hg_cy_EPI_hg <- features_hg_cy |>
    dplyr::filter(hg %in% features_hg_EPI) |>
    dplyr::pull(hg)

features_hg_cy_EPI_cy <- features_hg_cy |>
    dplyr::filter(hg %in% features_hg_EPI) |>
    dplyr::pull(cy)
features_hg_TE <- rownames(matrix_readcount_use)[
    stringr::str_remove(
        string = rownames(matrix_readcount_use),
        pattern = "^E.+_"
    ) %in% (table_s3_sheet1 |>
        dplyr::filter(
            cluster == "TE"
        ) |>
        dplyr::pull(gene))
] |>
    unique()

features_hg_cy_TE_hg <- features_hg_cy |>
    dplyr::filter(hg %in% features_hg_TE) |>
    dplyr::pull(hg)

features_hg_cy_TE_cy <- features_hg_cy |>
    dplyr::filter(hg %in% features_hg_TE) |>
    dplyr::pull(cy)
features_hg_HYP <- rownames(matrix_readcount_use)[
    stringr::str_remove(
        string = rownames(matrix_readcount_use),
        pattern = "^E.+_"
    ) %in% (table_s3_sheet1 |>
        dplyr::filter(
            cluster == "PE"
        ) |>
        dplyr::pull(gene))
] |>
    unique()

features_hg_cy_HYP_hg <- features_hg_cy |>
    dplyr::filter(hg %in% features_hg_HYP) |>
    dplyr::pull(hg)

features_hg_cy_HYP_cy <- features_hg_cy |>
    dplyr::filter(hg %in% features_hg_HYP) |>
    dplyr::pull(cy)

EPI

# z-score
clusters_selected_blastoid <- c("2", "3", "11")

matrix_readcount_pca <- cbind(
    matrix_readcount_blastoid_pseudobulk[, clusters_selected_blastoid],
    #
    matrix_readcount_stem_pseudobulk[, c("LW50"), drop = FALSE],
    #
    matrix_readcount_PRJEB11202_pseudobulk_EPI,
    matrix_readcount_PRJNA562548_pseudobulk_EPI,
    matrix_readcount_PRJNA431392_pseudobulk_EPI,
    matrix_readcount_PRJEB7132_pseudobulk,
    matrix_readcount_PRJNA576801_pseudobulk_lineage[
        , c("Naïve_hPSC", "Primed_hPSC")
    ],
    matrix_readcount_PRJNA638350_pseudobulk_lineage[
        , c("Naive_hESC", "Primed_hESC")
    ]
)

colnames(matrix_readcount_pca) <- paste(
    rep(
        c(
            "Blastoid",
            #
            "Blastoid",
            #
            "PRJEB11202",
            "PRJNA562548",
            "PRJNA431392",
            "PRJEB7132",
            "PRJNA576801",
            "PRJNA638350"
        ),
        times = c(
            length(clusters_selected_blastoid),
            #
            ncol(matrix_readcount_stem_pseudobulk[, c("LW50"), drop = FALSE]),
            #
            ncol(matrix_readcount_PRJEB11202_pseudobulk_EPI),
            ncol(matrix_readcount_PRJNA562548_pseudobulk_EPI),
            ncol(matrix_readcount_PRJNA431392_pseudobulk_EPI),
            ncol(matrix_readcount_PRJEB7132_pseudobulk),
            length(c("Naïve_hPSC", "Primed_hPSC")),
            length(c("Naive_hESC", "Primed_hESC"))
        )
    ),
    colnames(matrix_readcount_pca),
    sep = ":"
)
pca_out_EPI <- perform_pca(
    matrix = matrix_readcount_pca,
    features = features_hg_cy_EPI_hg,
    min_counts = 1
)

embedding_pca_EPI <- pca_out_EPI$x |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "sample_name") |>
    dplyr::select(sample_name:PC3) |>
    dplyr::rename(
        x_pca = PC1,
        y_pca = PC2,
        z_pca = PC3
    ) |>
    dplyr::relocate(x_pca, y_pca, z_pca, .after = dplyr::last_col()) |>
    dplyr::mutate(
        lineage = stringr::str_remove(string = sample_name, pattern = "^P.+:"),
        dataset = stringr::str_remove(string = sample_name, pattern = ":.+$")
    )

TE

clusters_selected_blastoid <- c("0", "1", "8", "9", "12", "17")

matrix_readcount_pca <- cbind(
    matrix_readcount_blastoid_pseudobulk[, clusters_selected_blastoid],
    #
    matrix_readcount_stem_pseudobulk[, c("LW52"), drop = FALSE],
    #
    matrix_readcount_PRJEB11202_pseudobulk_TE,
    matrix_readcount_PRJNA431392_pseudobulk_TE,
    #
    matrix_readcount_PRJNA576801_pseudobulk_lineage,
    matrix_readcount_PRJNA638350_pseudobulk_lineage[
        ,
        c("Naive_hESC", "Primed_hESC", "tdhTSC", "hTSC")
    ],
    matrix_readcount_PRJNA516921_pseudobulk_TE
)

# name columns
colnames(matrix_readcount_pca) <- paste(
    rep(
        c(
            "Blastoid",
            #
            "Blastoid",
            #
            "PRJEB11202",
            "PRJNA431392",
            #
            "PRJNA576801",
            "PRJNA638350",
            "PRJNA516921"
        ),
        times = c(
            ncol(matrix_readcount_blastoid_pseudobulk[
                ,
                clusters_selected_blastoid
            ]),
            #
            ncol(matrix_readcount_stem_pseudobulk[, c("LW52"), drop = FALSE]),
            #
            ncol(matrix_readcount_PRJEB11202_pseudobulk_TE),
            ncol(matrix_readcount_PRJNA431392_pseudobulk_TE),
            #
            ncol(matrix_readcount_PRJNA576801_pseudobulk_lineage),
            length(c("Naive_hESC", "Primed_hESC", "tdhTSC", "hTSC")),
            ncol(matrix_readcount_PRJNA516921_pseudobulk_TE)
        )
    ),
    colnames(matrix_readcount_pca),
    sep = ":"
)

colnames(matrix_readcount_pca) <- stringr::str_replace_all(
    string = colnames(matrix_readcount_pca),
    pattern = " ",
    replacement = "_"
)

matrix_readcount_pca <- matrix_readcount_pca[
    , colnames(matrix_readcount_pca) %in% c(
        "Blastoid:0",
        "Blastoid:1",
        "Blastoid:8",
        "Blastoid:9",
        "Blastoid:12",
        "Blastoid:17",
        "Blastoid:LW52",
        "PRJEB11202:E5",
        "PRJEB11202:E6",
        "PRJEB11202:E7",
        "PRJNA431392:E6",
        "PRJNA431392:E8",
        "PRJNA431392:E10",
        "PRJNA431392:E12",
        "PRJNA576801:Naïve_hTSC",
        "PRJNA638350:tdhTSC",
        "PRJNA638350:hTSC",
        "PRJNA516921:Day_6",
        "PRJNA516921:Day_7",
        "PRJNA516921:Day_8",
        "PRJNA516921:Day_9",
        "PRJNA516921:Day_10"
    )
]
pca_out_TE <- perform_pca(
    matrix = matrix_readcount_pca,
    features = features_hg_cy_TE_hg,
    min_counts = 1
)

embedding_pca_TE <- pca_out_TE$x |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "sample_name") |>
    dplyr::select(sample_name:PC3) |>
    dplyr::rename(
        x_pca = PC1,
        y_pca = PC2,
        z_pca = PC3
    ) |>
    dplyr::relocate(x_pca, y_pca, z_pca, .after = dplyr::last_col()) |>
    dplyr::mutate(
        lineage = stringr::str_remove(string = sample_name, pattern = "^P.+:"),
        dataset = stringr::str_remove(string = sample_name, pattern = ":.+$")
    )

HYP

clusters_selected_blastoid <- c("18", "15", "16", "13")

matrix_readcount_pca <- cbind(
    matrix_readcount_blastoid_pseudobulk[
        ,
        clusters_selected_blastoid,
        drop = FALSE
    ],
    #
    matrix_readcount_stem_pseudobulk[, c("LW51"), drop = FALSE],
    #
    matrix_readcount_PRJEB11202_pseudobulk_HYP,
    matrix_readcount_PRJNA562548_pseudobulk_HYP,
    matrix_readcount_PRJNA431392_pseudobulk_HYP,
    #
    matrix_readcount_PRJNA574150_pseudobulk_lineage[, "nEnd", drop = FALSE]
)

# name columns
colnames(matrix_readcount_pca) <- paste(
    rep(
        c(
            "Blastoid",
            #
            "Blastoid",
            #
            "PRJEB11202",
            "PRJNA562548",
            "PRJNA431392",
            #
            "PRJNA574150"
        ),
        times = c(
            length(clusters_selected_blastoid),
            #
            ncol(matrix_readcount_stem_pseudobulk[, c("LW51"), drop = FALSE]),
            #
            ncol(matrix_readcount_PRJEB11202_pseudobulk_HYP),
            ncol(matrix_readcount_PRJNA562548_pseudobulk_HYP),
            ncol(matrix_readcount_PRJNA431392_pseudobulk_HYP),
            #
            length("nEnd")
        )
    ),
    colnames(matrix_readcount_pca),
    sep = ":"
)

colnames(matrix_readcount_pca) <- stringr::str_replace_all(
    string = colnames(matrix_readcount_pca),
    pattern = " ",
    replacement = "_"
)
pca_out_HYP <- perform_pca(
    matrix = matrix_readcount_pca,
    features = features_hg_cy_HYP_hg,
    min_counts = 1
)

embedding_pca_HYP <- pca_out_HYP$x |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "sample_name") |>
    dplyr::select(sample_name:PC3) |>
    dplyr::rename(
        x_pca = PC1,
        y_pca = PC2,
        z_pca = PC3
    ) |>
    dplyr::relocate(x_pca, y_pca, z_pca, .after = dplyr::last_col()) |>
    dplyr::mutate(
        lineage = stringr::str_remove(string = sample_name, pattern = "^P.+:"),
        dataset = stringr::str_remove(string = sample_name, pattern = ":.+$")
    )

Visualization, joint

# assign colors

studies <- tibble::tribble(
    ~bioproject, ~description,
    "Blastoid", "Blastoid",
    "PRJEB11202", "Petropoulos et al., 2016",
    "PRJEB7132", "Takashima et al., 2014",
    "PRJNA431392", "Zhou et al., 2019",
    "PRJNA516921", "lv et al., 2019",
    "PRJNA562548", "Xiang et al., 2020",
    "PRJNA574150", "Linneberg-Agerholm et al., 2019",
    "PRJNA576801", "Dong et al., 2020",
    "PRJNA638350", "Cinkornpumin et al., 2020"
) |>
    mutate(
        color = c(
            "#F8766D",
            "#D39200",
            "#93AA00",
            "#00BA38",
            "#00C19F",
            "#00B9E3",
            "#619CFF",
            "#DB72FB",
            "#FF61C3"
        )
    )
embedding_pca_EPI <- embedding_pca_EPI |>
    dplyr::mutate(dataset = factor(dataset, levels = studies$bioproject))

embedding_pca_HYP <- embedding_pca_HYP |>
    dplyr::mutate(
        y_pca = 0 - y_pca,
        dataset = factor(dataset, levels = studies$bioproject),
    )

embedding_pca_TE <- embedding_pca_TE |>
    dplyr::mutate(
        dataset = factor(dataset, levels = studies$bioproject),
        lineage = stringr::str_replace(
            string = lineage, pattern = "Day_", replacement = "E"
        )
    )
# compose

p1 <- plot_pca_embedding(
    data = embedding_pca_EPI,
    x = x_pca,
    y = y_pca,
    color = dataset,
    title = "EPI; PCA",
    legend_x = 0.5,
    legend_y = 0.995,
    min_segment_length = 0
) +
    add_xy_label_pca(pca_out_EPI) +
    ggplot2::scale_color_manual(
        values = studies$color,
        breaks = studies$bioproject,
        labels = studies$description,
        drop = FALSE
    ) +
    ggplot2::theme(legend.position = "bottom") +
    ggplot2::guides(
        colour = ggplot2::guide_legend(
            override.aes = list(size = 2.5), ncol = 2,
        )
    )
ab <- coef(
    MASS::rlm(y_pca ~ x_pca,
        data = embedding_pca_EPI |>
            dplyr::filter(dataset != "Blastoid")
    )
)
p1 <- p1 + ggplot2::geom_abline(
    intercept = ab[1], slope = ab[2], color = "#8ace7e", size = 0.2
)
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0. Please use
`linewidth` instead.
ℹ The deprecated feature was likely used in the ggplot2 package.
  Please report the issue at <https://github.com/tidyverse/ggplot2/issues>.
p2 <- plot_pca_embedding(
    data = embedding_pca_HYP,
    x = y_pca,
    y = x_pca,
    color = dataset,
    title = "HYP; PCA",
    legend_x = 0.5,
    legend_y = 0.995,
    min_segment_length = 0
) +
    add_xy_label_pca(pca_out_HYP, "PC2", "PC1") +
    ggplot2::scale_color_manual(
        values = studies$color,
        breaks = studies$bioproject,
        labels = studies$description,
        drop = FALSE
    ) +
    ggplot2::theme(legend.position = "bottom") +
    ggplot2::guides(
        colour = ggplot2::guide_legend(
            override.aes = list(size = 2.5), ncol = 2
        )
    )
ab <- coef(
    MASS::rlm(x_pca ~ y_pca,
        data = embedding_pca_HYP |>
            dplyr::filter(dataset != "Blastoid")
    )
)
p2 <- p2 + ggplot2::geom_abline(
    intercept = ab[1], slope = ab[2],
    color = "#ff684c", linewidth = 0.2
)

p3 <- plot_pca_embedding(
    data = embedding_pca_TE,
    x = x_pca,
    y = y_pca,
    color = dataset,
    title = "TE; PCA",
    legend_x = 0.5,
    legend_y = 0.995,
    min_segment_length = 0
) +
    add_xy_label_pca(pca_out_TE) +
    ggplot2::scale_color_manual(
        values = studies$color,
        breaks = studies$bioproject,
        labels = studies$description,
        drop = FALSE
    ) +
    ggplot2::theme(legend.position = "bottom") +
    ggplot2::guides(
        colour = ggplot2::guide_legend(
            override.aes = list(size = 2.5), ncol = 2
        )
    )
ab <- coef(
    MASS::rlm(y_pca ~ x_pca,
        data = embedding_pca_TE |>
            dplyr::filter(dataset != "Blastoid")
    )
)
p3 <- p3 + ggplot2::geom_abline(
    intercept = ab[1], slope = ab[2],
    color = "#9467bd", size = 0.2
)
list(p1 + p2 + p3) |>
    purrr::reduce(`+`) +
    patchwork::plot_layout(ncol = 1, guides = "collect") +
    patchwork::plot_annotation(theme = theme(plot.margin = margin())) &
    theme(legend.position = "bottom", legend.direction = "horizontal")

Principal component analysis (PCA) of blastoid-derived ELCs, HLCs and TLCs relative to published maps from human embryos and established human PSC lines

Comparison w/ cynomolgus monkeys


A developmental coordinate of pluripotency among mice, monkeys and humans

Data loading

color_palette_cluster <- c(
    "0" = "#8DD3C7",
    "1" = "#9EDAE5FF",
    "2" = "#BEBADA",
    "3" = "#FB8072",
    "4" = "#80B1D3",
    "5" = "#FDB462",
    "6" = "#B3DE69",
    "7" = "#FCCDE5",
    "8" = "#DC863B",
    "9" = "#BC80BD",
    "10" = "#11c638",
    "11" = "#BCBD22FF",
    "12" = "#17BECFFF",
    "13" = "#AEC7E8FF",
    "14" = "#EAD3C6",
    "15" = "#98DF8AFF",
    "16" = "#FF9896FF",
    "17" = "#C5B0D5FF",
    "18" = "#C49C94FF",
    "19" = "#F7B6D2FF",
    "20" = "#D33F6A",
    "21" = "#8E063B",
    "22" = "#023FA5"
)

# prepare matrix
matrix_cpm_PRJNA301445_cy <- load_matrix(
    file.path(PROJECT_DIR, "raw/public/PRJNA301445/preprocessed/cy")
)

embedding_PRJNA301445_cy <- vroom::vroom(
    file = file.path(
        PROJECT_DIR,
        "raw/public/PRJNA301445/Comparison/cy_EPI/embedding_cy.csv"
    )
) |>
    dplyr::mutate(
        group = factor(
            group,
            levels = c(
                "ICM",
                "Pre-EPI",
                "Hypoblast",
                "PreE-TE",
                "PreL-TE",
                "PostE-EPI",
                "PostL-EPI",
                "Gast1",
                "Gast2a",
                "Gast2b",
                "Post-paTE",
                "VE/YE",
                "EXMC",
                "cyESCFF",
                "cyESCoF"
                # "E5.5EPI",
                # "E5.5VE",
                # "E6.5EPI-T hi",
                # "E6.5EPI-T lo",
                # "E6.5VE",
                # "EpiLC",
                # "2i+L ESC"
            )
        )
    )
matrix_cpm_PRJNA301445_cy_pseudobulk_lineage <- purrr::map(
    levels(embedding_PRJNA301445_cy$group), \(x) {
        cells_in_group <- embedding_PRJNA301445_cy |>
            dplyr::filter(
                group == x
            ) |>
            dplyr::pull(cell)

        Matrix::rowMeans(matrix_cpm_PRJNA301445_cy[, cells_in_group])
    }
) |>
    purrr::reduce(cbind) |>
    `colnames<-`(levels(embedding_PRJNA301445_cy$group)) |>
    as.matrix()

rm(matrix_cpm_PRJNA301445_cy)
rm(embedding_PRJNA301445_cy)
# prepare features
features_hg_cy_EPI_HYP_TE_hg <- features_hg_cy |>
    dplyr::filter(
        hg %in% (
            c(
                features_hg_cy_EPI_hg,
                features_hg_cy_HYP_hg,
                features_hg_cy_TE_hg
            ) |> unique()
        )
    ) |>
    dplyr::pull(hg)

features_hg_cy_EPI_HYP_TE_cy <- features_hg_cy |>
    dplyr::filter(
        hg %in% (
            c(
                features_hg_cy_EPI_hg,
                features_hg_cy_HYP_hg,
                features_hg_cy_TE_hg
            ) |> unique()
        )
    ) |>
    dplyr::pull(cy)

Correlation heatmap

matrix_corr <- cbind(
    calc_cpm(
        matrix_readcount_blastoid_pseudobulk
    )[features_hg_cy_EPI_HYP_TE_hg, ],
    matrix_cpm_PRJNA301445_cy_pseudobulk_lineage[
        features_hg_cy_EPI_HYP_TE_cy,
    ]
)

matrix_heatmap_corr_EPI_HYP_TE <- cor(log10(matrix_corr + 1) |> as.matrix())

matrix_heatmap_corr_EPI_HYP_TE <- matrix_heatmap_corr_EPI_HYP_TE[
    c("11", "18", "0", "1", "8", "9", "12", "17"),
    c(
        "ICM", "Pre-EPI", "PostE-EPI", "PostL-EPI",
        "Gast1", "Gast2a", "Gast2b",
        "Hypoblast", "PreE-TE", "PreL-TE", "Post-paTE"
    )
]
colnames(matrix_heatmap_corr_EPI_HYP_TE) <- paste(
    rep(
        c("EPI", "HYP", "TE"),
        times = c(7, 1, 3)
    ),
    colnames(matrix_heatmap_corr_EPI_HYP_TE),
    sep = ":"
)


Preview correlations.

Code
matrix_heatmap_corr_EPI_HYP_TE |>
    as.data.frame() |>
    tibble::rownames_to_column(var = "cluster") |>
    gt::gt() |>
    gt::fmt_number(columns = c(`EPI:ICM`:`TE:Post-paTE`), decimals = 2) |>
    gt::data_color(
        columns = c(`EPI:ICM`:`TE:Post-paTE`),
        colors = scales::col_numeric(
            c("#63BE7B", "#FFEB84", "#F87274"),
            domain = range(matrix_heatmap_corr_EPI_HYP_TE)
        )
    ) |>
    gt::cols_align(
        columns = dplyr::everything(),
        align = c("center")
    ) |>
    gt::cols_width(
        dplyr::everything() ~ gt::px(60)
    )
cluster EPI:ICM EPI:Pre-EPI EPI:PostE-EPI EPI:PostL-EPI EPI:Gast1 EPI:Gast2a EPI:Gast2b HYP:Hypoblast TE:PreE-TE TE:PreL-TE TE:Post-paTE
11 0.41 0.55 0.53 0.45 0.43 0.33 0.31 0.22 0.35 0.28 0.11
18 0.44 0.32 0.19 0.23 0.27 0.31 0.34 0.62 0.42 0.45 0.28
0 0.32 0.28 0.17 0.18 0.20 0.13 0.20 0.28 0.42 0.52 0.65
1 0.42 0.39 0.28 0.28 0.30 0.23 0.28 0.35 0.48 0.56 0.54
8 0.38 0.38 0.33 0.36 0.38 0.31 0.36 0.38 0.46 0.56 0.60
9 0.21 0.16 0.06 0.06 0.09 0.01 0.09 0.15 0.29 0.41 0.58
12 0.37 0.40 0.33 0.32 0.34 0.26 0.30 0.33 0.40 0.47 0.43
17 0.31 0.31 0.24 0.24 0.26 0.17 0.24 0.26 0.38 0.48 0.56
ha_group <- stringr::str_remove(
    string = colnames(matrix_heatmap_corr_EPI_HYP_TE),
    pattern = ":.+$"
)

ha_column <- ComplexHeatmap::HeatmapAnnotation(
    lineage = ComplexHeatmap::anno_simple(
        colnames(matrix_heatmap_corr_EPI_HYP_TE),
        # pch = anno_labels_cluster,
        col = setNames(
            object = scales::hue_pal()(n = ncol(matrix_heatmap_corr_EPI_HYP_TE)),
            nm = colnames(matrix_heatmap_corr_EPI_HYP_TE)
        ),
        which = "column",
        pt_size = unit(2, "mm"),
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = unit(2, "mm")
    ),
    show_annotation_name = TRUE,
    annotation_label = c(
        "Lineage"
    ),
    annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    annotation_name_side = "left"
)

ha_left <- ComplexHeatmap::HeatmapAnnotation(
    cluster = ComplexHeatmap::anno_simple(
        rownames(matrix_heatmap_corr_EPI_HYP_TE),
        col = color_palette_cluster,
        which = "row",
        # pt_size = unit(2, "mm"),
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = unit(2, "mm")
    ),
    #
    lineage = ComplexHeatmap::anno_simple(
        rownames(matrix_heatmap_corr_EPI_HYP_TE) |>
            tibble::enframe() |>
            dplyr::mutate(
                color = case_when(
                    value %in% c(11) ~ "EPI",
                    value %in% c(18) ~ "HYP",
                    value %in% c(0, 1, 8, 9, 12, 17) ~ "TE",
                    value %in% c(10, 14) ~ "Pre-lineage",
                    TRUE ~ "Blastoid"
                )
            ) |>
            dplyr::pull(color),
        col = setNames(
            object = c("grey70", "#8ace7e", "#ff684c", "#9467bd", "#ffd8b1"),
            nm = c("Blastoid", "EPI", "HYP", "TE", "Pre-lineage")
        ),
        which = "row",
        # pt_size = unit(2, "mm"),
        pt_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        simple_anno_size = unit(2, "mm")
    ),
    # ,
    which = "row",
    show_annotation_name = FALSE,
    annotation_label = c(
        "Cluster",
        "Lineage"
    ),
    annotation_name_gp = grid::gpar(fontfamily = "Arial", fontsize = 6)
)

colors_ <- as.character(
    wesanderson::wes_palette("Zissou1", 10, type = "continuous")
)

col_fun <- circlize::colorRamp2(
    seq(0, 0.6, 0.1),
    # viridis::plasma(11)
    # wesanderson::wes_palette("Zissou1", 51, type = "continuous")
    colors_[c(1, 2, 3, 5, 7, 9, 10)]
)

ht <- ComplexHeatmap::Heatmap(
    matrix_heatmap_corr_EPI_HYP_TE,
    # name = "heatmap",
    # col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
    col = col_fun,
    border = NA,
    rect_gp = grid::gpar(col = "white", lwd = 1),
    cell_fun = function(j, i, x, y, width, height, fill) {
        # grid::grid.text(
        #     sprintf("%.2f", corr_heatmap[i, j]), x, y,
        #     gp = grid::gpar(fontsize = 6)
        # )
        grid::grid.text(
            round(matrix_heatmap_corr_EPI_HYP_TE[i, j], 2),
            x, y,
            gp = grid::gpar(fontsize = 5)
        )
    },
    #
    cluster_rows = FALSE,
    cluster_columns = FALSE,
    #
    # row_labels = NA,
    row_names_side = c("left"),
    row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    show_row_names = TRUE,
    #
    column_labels = stringr::str_remove(
        string = colnames(matrix_heatmap_corr_EPI_HYP_TE), pattern = "^.+:"
    ),
    column_names_side = c("top"),
    column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    column_names_rot = -90,
    show_column_names = TRUE,
    #
    show_heatmap_legend = FALSE,
    #
    show_row_dend = FALSE,
    show_column_dend = FALSE,
    #
    top_annotation = ha_column,
    left_annotation = ha_left,
    #
    heatmap_legend_param = list(
        title = "Corr",
        title_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 6
        ),
        legend_direction = "vertical",
        labels_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        legend_height = unit(15, "mm"),
        legend_width = unit(5, "mm")
    ),
    #
    column_split = ha_group,
    column_gap = unit(1.5, "mm"),
    column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    column_title_rot = -90,
    column_title = NULL
)
# legend
lgd_species <- ComplexHeatmap::Legend(
    title = "Species",
    labels = c("Monkey", "Human"),
    #
    grid_height = unit(2.5, "mm"), grid_width = unit(2.5, "mm"),
    #
    legend_gp = grid::gpar(
        fill = c("Cynomolgus Monkey" = "#3D79F3FF", "Human" = "#34A74BFF")
    ),
    labels_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 5
    ),
    title_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 6
    )
)

lgd_colorbar <- ComplexHeatmap::Legend(
    col_fun = col_fun,
    title = "Corr",
    grid_height = unit(1, "mm"),
    grid_width = unit(2, "mm"),
    legend_height = unit(10, "mm"),
    legend_width = unit(2, "mm"),
    title_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 6
    ),
    labels_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 5
    )
)

lgd_lineage <- ComplexHeatmap::Legend(
    title = "Lineage",
    labels = c("ELCs", "HLCs", "TLCs"),
    #
    grid_height = unit(2.5, "mm"), grid_width = unit(2.5, "mm"),
    #
    legend_gp = grid::gpar(
        fill = c(
            "ELCs" = "#8ace7e",
            "HLCs" = "#ff684c",
            "TLCs" = "#9467bd"
        )
    ),
    labels_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 5
    ),
    title_gp = grid::gpar(
        fontfamily = "Arial",
        fontsize = 6
    )
)

pd <- ComplexHeatmap::packLegend(
    # lgd_cluster,
    lgd_colorbar,
    lgd_lineage,
    # lgd_species,
    gap = unit(8, "mm"),
    direction = "vertical"
)
ComplexHeatmap::draw(
    ht,
    heatmap_legend_list = list(pd)
)

Derived stem cells

Data loading

matrix_readcount_5iLA <- vroom::vroom(
    file = file.path(
        PROJECT_DIR,
        "raw/naive_human_PSC",
        "matrix",
        "xaa_Aligned.sortedByCoord.out_deduped_q10_gene_id_featureCounts.txt.gz"
    ),
    delim = "\t",
    col_names = TRUE,
    skip = 1
) |>
    dplyr::select(-c(2:6))

matrix_readcount_5iLA <- Matrix(
    data = as.matrix(matrix_readcount_5iLA[, -1]),
    dimnames = list(
        matrix_readcount_5iLA[, 1, drop = TRUE],
        c("5iLA-1", "5iLA-2")
    ),
    sparse = TRUE
)

Correlation heatmap

matrix_corr <- cbind(
    matrix_readcount_stem_pseudobulk[, c("LW49", "LW50", "LW51", "LW52")],
    "5iLA" = Matrix::rowSums(matrix_readcount_5iLA),
    matrix_readcount_PRJEB7132_pseudobulk,
    matrix_readcount_PRJNA576801_pseudobulk_lineage,
    matrix_readcount_PRJNA638350_pseudobulk_lineage[
        ,
        !colnames(matrix_readcount_PRJNA638350_pseudobulk_lineage) %in% c(
            "FT190", "Hec116"
        )
    ],
    matrix_readcount_PRJNA574150_pseudobulk_lineage[, "nEnd", drop = FALSE]
)

colnames(matrix_corr) <- c(
    "WIBR3-5i/L/A, scRNA-Seq",
    "Blastoid naïve ES cells",
    "Blastoid nEND",
    "Blastoid TSCs",
    #
    "WIBR3-5i/L/A, bulk",
    #
    "PRJEB7132:H9 Primed",
    "PRJEB7132:H9 Reset",
    #
    "PRJNA576801:Naïve ES cells",
    "PRJNA576801:TSCs",
    "PRJNA576801:Primed ES cells",
    #
    "PRJNA638350:Naïve ES cells",
    "PRJNA638350:Primed ES cells",
    "PRJNA638350:TSCs (TD)",
    "PRJNA638350:TSCs",
    #
    "PRJNA574150:nEND"
)

features_ <- c(
    features_hg_cy_EPI_hg,
    features_hg_cy_HYP_hg,
    features_hg_cy_TE_hg
) |> unique()
matrix_heatmap_corr <- cor(
    log10(calc_cpm(matrix_corr)[features_, ] + 1) |>
        as.matrix()
)
column_labels <- colnames(matrix_heatmap_corr) |>
    tibble::enframe(value = "sample_name") |>
    dplyr::mutate(
        a = stringr::str_remove(string = sample_name, pattern = ":.+$")
    ) |>
    dplyr::left_join(
        studies,
        by = c("a" = "bioproject")
    ) |>
    dplyr::select(sample_name, a, description) |>
    dplyr::mutate(
        label = dplyr::case_when(
            is.na(description) ~ sample_name,
            TRUE ~ paste(sample_name, description, sep = "; ")
        ),
        label = stringr::str_remove(
            string = label,
            pattern = ".+:"
        )
    ) |>
    dplyr::pull(label)
col_fun <- circlize::colorRamp2(
    seq(0, 1, 0.2),
    # viridis::plasma(11)
    # wesanderson::wes_palette("Zissou1", 51, type = "continuous")
    #
    as.character(
        wesanderson::wes_palette("Zissou1", 10, type = "continuous")
    )[c(1, 3, 5, 7, 9, 10)]
)

ht <- ComplexHeatmap::Heatmap(
    matrix_heatmap_corr,
    # name = "heatmap",
    # col = wesanderson::wes_palette("Zissou1", 50, type = "continuous"),
    col = col_fun,
    border = NA,
    rect_gp = grid::gpar(col = "white", lwd = 1),
    cell_fun = function(j, i, x, y, width, height, fill) {
        grid::grid.text(
            round(matrix_heatmap_corr[i, j], 2),
            x, y,
            gp = grid::gpar(fontsize = 5)
        )
    },
    #
    cluster_rows = FALSE,
    cluster_columns = FALSE,
    #
    row_labels = column_labels,
    row_names_side = c("left"),
    row_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    show_row_names = TRUE,
    #
    column_labels = column_labels,
    column_names_side = c("top"),
    column_names_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    # column_names_rot = -90,
    column_names_rot = -45,
    show_column_names = TRUE,
    #
    show_heatmap_legend = FALSE,
    #
    show_row_dend = FALSE,
    show_column_dend = FALSE,
    #
    # top_annotation = ha_column,
    # left_annotation = ha_left,
    #
    heatmap_legend_param = list(
        title = "Corr",
        title_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 6
        ),
        legend_direction = "vertical",
        labels_gp = grid::gpar(
            fontfamily = "Arial",
            fontsize = 5
        ),
        legend_height = unit(15, "mm"),
        legend_width = unit(5, "mm")
    ),
    #
    # column_split = ha_group,
    column_gap = unit(1.5, "mm"),
    column_title_gp = grid::gpar(fontfamily = "Arial", fontsize = 6),
    column_title_rot = -90,
    column_title = NULL
)
ComplexHeatmap::draw(ht)

Correlation analysis of blastoid naïve ES cells, blastoid nEND and blastoid TSCs compared to published transcriptomes


R session info

devtools::session_info()
─ Session info ───────────────────────────────────────────────────────────────
 setting  value
 version  R version 4.2.1 (2022-06-23)
 os       macOS Monterey 12.6
 system   aarch64, darwin21.6.0
 ui       unknown
 language (EN)
 collate  en_US.UTF-8
 ctype    en_US.UTF-8
 tz       America/Chicago
 date     2022-09-25
 pandoc   2.19.2 @ /opt/homebrew/bin/ (via rmarkdown)

─ Packages ───────────────────────────────────────────────────────────────────
 package        * version       date (UTC) lib source
 BiocGenerics     0.42.0        2022-04-26 [1] Bioconductor
 bit              4.0.4         2020-08-04 [1] CRAN (R 4.2.0)
 bit64            4.0.5         2020-08-30 [1] CRAN (R 4.2.0)
 cachem           1.0.6         2021-08-19 [1] CRAN (R 4.2.0)
 Cairo            1.6-0         2022-07-05 [1] CRAN (R 4.2.1)
 callr            3.7.2         2022-08-22 [1] CRAN (R 4.2.1)
 cellranger       1.1.0         2016-07-27 [1] CRAN (R 4.2.0)
 circlize         0.4.15        2022-05-10 [1] CRAN (R 4.2.0)
 cli              3.4.1         2022-09-23 [1] CRAN (R 4.2.1)
 clue             0.3-61        2022-05-30 [1] CRAN (R 4.2.0)
 cluster          2.1.4         2022-08-22 [2] CRAN (R 4.2.1)
 codetools        0.2-18        2020-11-04 [2] CRAN (R 4.2.1)
 colorspace       2.0-3         2022-02-21 [1] CRAN (R 4.2.0)
 ComplexHeatmap   2.12.1        2022-08-09 [1] Bioconductor
 crayon           1.5.1         2022-03-26 [1] CRAN (R 4.2.0)
 devtools         2.4.4.9000    2022-09-23 [1] Github (r-lib/devtools@9e2793a)
 digest           0.6.29        2021-12-01 [1] CRAN (R 4.2.0)
 doParallel       1.0.17        2022-02-07 [1] CRAN (R 4.2.0)
 dplyr          * 1.0.99.9000   2022-09-23 [1] Github (tidyverse/dplyr@19c2be3)
 ellipsis         0.3.2         2021-04-29 [1] CRAN (R 4.2.0)
 evaluate         0.16          2022-08-09 [1] CRAN (R 4.2.1)
 extrafont      * 0.18          2022-04-12 [1] CRAN (R 4.2.0)
 extrafontdb      1.0           2012-06-11 [1] CRAN (R 4.2.0)
 fansi            1.0.3         2022-03-24 [1] CRAN (R 4.2.0)
 farver           2.1.1         2022-07-06 [1] CRAN (R 4.2.1)
 fastmap          1.1.0         2021-01-25 [1] CRAN (R 4.2.0)
 forcats        * 0.5.2.9000    2022-08-20 [1] Github (tidyverse/forcats@bd319e0)
 foreach          1.5.2         2022-02-02 [1] CRAN (R 4.2.0)
 fs               1.5.2.9000    2022-08-24 [1] Github (r-lib/fs@238032f)
 generics         0.1.3         2022-07-05 [1] CRAN (R 4.2.1)
 GetoptLong       1.0.5         2020-12-15 [1] CRAN (R 4.2.0)
 ggplot2        * 3.3.6.9000    2022-09-12 [1] Github (tidyverse/ggplot2@a58b48c)
 ggrepel          0.9.1         2021-01-15 [1] CRAN (R 4.2.0)
 GlobalOptions    0.1.2         2020-06-10 [1] CRAN (R 4.2.0)
 glue             1.6.2.9000    2022-04-22 [1] Github (tidyverse/glue@d47d6c7)
 gt               0.7.0.9000    2022-09-23 [1] Github (rstudio/gt@4030fb7)
 gtable           0.3.1.9000    2022-09-01 [1] Github (r-lib/gtable@c1a7a81)
 highr            0.9           2021-04-16 [1] CRAN (R 4.2.0)
 hms              1.1.2         2022-08-19 [1] CRAN (R 4.2.1)
 htmltools        0.5.3         2022-07-18 [1] CRAN (R 4.2.1)
 htmlwidgets      1.5.4         2022-08-23 [1] Github (ramnathv/htmlwidgets@400cf1a)
 httpuv           1.6.6         2022-09-08 [1] CRAN (R 4.2.1)
 IRanges          2.30.1        2022-08-18 [1] Bioconductor
 iterators        1.0.14        2022-02-05 [1] CRAN (R 4.2.0)
 jsonlite         1.8.0         2022-02-22 [1] CRAN (R 4.2.0)
 knitr            1.40          2022-08-24 [1] CRAN (R 4.2.1)
 labeling         0.4.2         2020-10-20 [1] CRAN (R 4.2.0)
 later            1.3.0         2021-08-18 [1] CRAN (R 4.2.0)
 lattice          0.20-45       2021-09-22 [2] CRAN (R 4.2.1)
 lifecycle        1.0.2.9000    2022-09-23 [1] Github (r-lib/lifecycle@0a6860a)
 lubridate      * 1.8.0.9000    2022-05-24 [1] Github (tidyverse/lubridate@0bb49b2)
 magick           2.7.3         2021-08-18 [1] CRAN (R 4.2.0)
 magrittr         2.0.3         2022-03-30 [1] CRAN (R 4.2.0)
 MASS             7.3-58.1      2022-08-03 [2] CRAN (R 4.2.1)
 Matrix         * 1.5-1         2022-09-13 [1] CRAN (R 4.2.1)
 matrixStats      0.62.0        2022-04-19 [1] CRAN (R 4.2.0)
 memoise          2.0.1         2021-11-26 [1] CRAN (R 4.2.0)
 mime             0.12          2021-09-28 [1] CRAN (R 4.2.0)
 miniUI           0.1.1.1       2018-05-18 [1] CRAN (R 4.2.0)
 munsell          0.5.0         2018-06-12 [1] CRAN (R 4.2.0)
 patchwork      * 1.1.2.9000    2022-08-20 [1] Github (thomasp85/patchwork@c14c960)
 pillar           1.8.1         2022-08-19 [1] CRAN (R 4.2.1)
 pkgbuild         1.3.1         2021-12-20 [1] CRAN (R 4.2.0)
 pkgconfig        2.0.3         2019-09-22 [1] CRAN (R 4.2.0)
 pkgload          1.3.0         2022-06-27 [1] CRAN (R 4.2.1)
 png              0.1-7         2013-12-03 [1] CRAN (R 4.2.0)
 prettyunits      1.1.1.9000    2022-04-22 [1] Github (r-lib/prettyunits@8706d89)
 processx         3.7.0         2022-07-07 [1] CRAN (R 4.2.1)
 profvis          0.3.7         2020-11-02 [1] CRAN (R 4.2.0)
 promises         1.2.0.1       2021-02-11 [1] CRAN (R 4.2.0)
 ps               1.7.1         2022-06-18 [1] CRAN (R 4.2.0)
 purrr          * 0.9000.0.9000 2022-09-24 [1] Github (tidyverse/purrr@4ab13f5)
 R.cache          0.16.0        2022-07-21 [1] CRAN (R 4.2.1)
 R.methodsS3      1.8.2         2022-06-13 [1] CRAN (R 4.2.0)
 R.oo             1.25.0        2022-06-12 [1] CRAN (R 4.2.0)
 R.utils          2.12.0        2022-06-28 [1] CRAN (R 4.2.1)
 R6               2.5.1.9000    2022-08-04 [1] Github (r-lib/R6@87d5e45)
 ragg             1.2.2.9000    2022-09-12 [1] Github (r-lib/ragg@904e145)
 RColorBrewer     1.1-3         2022-04-03 [1] CRAN (R 4.2.0)
 Rcpp             1.0.9         2022-07-08 [1] CRAN (R 4.2.1)
 readr          * 2.1.2.9000    2022-09-20 [1] Github (tidyverse/readr@5cac6ed)
 readxl           1.4.1.9000    2022-09-15 [1] Github (tidyverse/readxl@1835c96)
 remotes          2.4.2         2022-09-12 [1] Github (r-lib/remotes@bc0949d)
 reticulate       1.26          2022-08-31 [1] CRAN (R 4.2.1)
 rjson            0.2.21        2022-01-09 [1] CRAN (R 4.2.0)
 rlang            1.0.6         2022-09-24 [1] Github (r-lib/rlang@66454bd)
 rmarkdown        2.16.1        2022-09-24 [1] Github (rstudio/rmarkdown@9577707)
 Rttf2pt1         1.3.10        2022-02-07 [1] CRAN (R 4.2.0)
 S4Vectors        0.34.0        2022-04-26 [1] Bioconductor
 sass             0.4.2         2022-07-16 [1] CRAN (R 4.2.1)
 scales           1.2.1.9000    2022-08-20 [1] Github (r-lib/scales@b3df2fb)
 sessioninfo      1.2.2         2021-12-06 [1] CRAN (R 4.2.0)
 shape            1.4.6         2021-05-19 [1] CRAN (R 4.2.0)
 shiny            1.7.2         2022-07-19 [1] CRAN (R 4.2.1)
 stringi          1.7.8         2022-07-11 [1] CRAN (R 4.2.1)
 stringr        * 1.4.1.9000    2022-08-21 [1] Github (tidyverse/stringr@792bc92)
 styler         * 1.7.0.9002    2022-09-21 [1] Github (r-lib/styler@1f4437b)
 systemfonts      1.0.4         2022-02-11 [1] CRAN (R 4.2.0)
 textshaping      0.3.6         2021-10-13 [1] CRAN (R 4.2.0)
 tibble         * 3.1.8.9002    2022-09-24 [1] Github (tidyverse/tibble@e9db4f4)
 tidyr          * 1.2.1.9000    2022-09-09 [1] Github (tidyverse/tidyr@653def2)
 tidyselect       1.1.2.9000    2022-09-21 [1] Github (r-lib/tidyselect@edd0a3b)
 tidyverse      * 1.3.2.9000    2022-09-12 [1] Github (tidyverse/tidyverse@3be8283)
 tzdb             0.3.0         2022-03-28 [1] CRAN (R 4.2.0)
 urlchecker       1.0.1         2021-11-30 [1] CRAN (R 4.2.0)
 usethis          2.1.6.9000    2022-09-23 [1] Github (r-lib/usethis@8ecb7ab)
 utf8             1.2.2         2021-07-24 [1] CRAN (R 4.2.0)
 vctrs            0.4.1.9000    2022-09-19 [1] Github (r-lib/vctrs@0a219ba)
 vroom            1.5.7.9000    2022-09-09 [1] Github (r-lib/vroom@0c2423e)
 wesanderson      0.3.6.9000    2022-04-22 [1] Github (karthik/wesanderson@2796b59)
 withr            2.5.0         2022-03-03 [1] CRAN (R 4.2.0)
 xfun             0.33          2022-09-12 [1] CRAN (R 4.2.1)
 xtable           1.8-4         2019-04-21 [1] CRAN (R 4.2.0)
 yaml             2.3.5         2022-02-21 [1] CRAN (R 4.2.0)

 [1] /opt/homebrew/lib/R/4.2/site-library
 [2] /opt/homebrew/Cellar/r/4.2.1_4/lib/R/library

─ Python configuration ───────────────────────────────────────────────────────
 python:         /Users/jialei/.pyenv/shims/python
 libpython:      /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10/lib/libpython3.9.dylib
 pythonhome:     /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10:/Users/jialei/.pyenv/versions/mambaforge-4.10.3-10
 version:        3.9.13 | packaged by conda-forge | (main, May 27 2022, 17:00:33)  [Clang 13.0.1 ]
 numpy:          /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10/lib/python3.9/site-packages/numpy
 numpy_version:  1.22.4
 numpy:          /Users/jialei/.pyenv/versions/mambaforge-4.10.3-10/lib/python3.9/site-packages/numpy
 
 NOTE: Python version was forced by RETICULATE_PYTHON

──────────────────────────────────────────────────────────────────────────────


Citation

BibTeX citation:
@article{yu,
  author = {Leqian Yu and Yulei Wei and Jialei Duan and Daniel A.
    Schmitz and Masahiro Sakurai and Lei Wang and Kunhua Wang and Shuhua
    Zhao and Gary C. Hon and Jun Wu},
  editor = {},
  publisher = {Nature Publishing Group},
  title = {Blastocyst-Like Structures Generated from Human Pluripotent
    Stem Cells},
  journal = {Nature},
  volume = {591},
  number = {7851},
  pages = {620 - 626},
  date = {},
  url = {https://doi.org/10.1038/s41586-021-03356-y},
  doi = {10.1038/s41586-021-03356-y},
  langid = {en},
  abstract = {Human blastoids provide a readily accessible, scalable,
    versatile and perturbable alternative to blastocysts for studying
    early human development, understanding early pregnancy loss and
    gaining insights into early developmental defects.}
}
For attribution, please cite this work as:
Leqian Yu, Yulei Wei, Jialei Duan, Daniel A. Schmitz, Masahiro Sakurai, Lei Wang, Kunhua Wang, Shuhua Zhao, Gary C. Hon, and Jun Wu. n.d. “Blastocyst-Like Structures Generated from Human Pluripotent Stem Cells.” Nature 591 (7851): 620–26. https://doi.org/10.1038/s41586-021-03356-y.