Sys.time()
[1] "2023-09-10 03:31:48 CDT"
Sys.time()
[1] "2023-09-10 03:31:48 CDT"
[1] "America/Chicago"
PROJECT_DIR <- "/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation"
Load required packages.
library(tidyverse)
## ── Attaching core tidyverse packages ─────────────────── tidyverse 2.0.0.9000 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4.9000
## ✔ forcats 1.0.0.9000 ✔ stringr 1.5.0.9000
## ✔ ggplot2 3.4.3.9000 ✔ tibble 3.2.1.9005
## ✔ lubridate 1.9.2.9000 ✔ tidyr 1.3.0.9000
## ✔ purrr 1.0.2.9000
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(Matrix)
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(patchwork)
library(extrafont)
## Registering fonts with R
library(furrr)
## Loading required package: future
create_gt <- function(x) {
x |>
gt::gt() |>
gt::data_color(
columns = c(median_umis),
fn = scales::col_numeric(
palette = c(
"green", "orange", "red"
),
domain = NULL
)
) |>
gt::fmt_number(
columns = c(num_cells),
sep_mark = ",",
decimals = 0,
suffixing = FALSE
) |>
gt::fmt_number(
columns = c(median_umis),
sep_mark = ",",
decimals = 1,
suffixing = FALSE
) |>
gt::grand_summary_rows(
columns = c(num_cells),
fns = c(
Sum = ~ sum(.)
),
fmt = ~ gt::fmt_number(., decimals = 0, use_seps = TRUE),
suffixing = FALSE
)
}
reticulate::py_config()
python: /Users/jialei/.pyenv/shims/python
libpython: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/libpython3.10.dylib
pythonhome: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3:/Users/jialei/.pyenv/versions/mambaforge-22.9.0-3
version: 3.10.9 | packaged by conda-forge | (main, Feb 2 2023, 20:26:08) [Clang 14.0.6 ]
numpy: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/python3.10/site-packages/numpy
numpy_version: 1.24.3
anndata: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/python3.10/site-packages/anndata
NOTE: Python version was forced by RETICULATE_PYTHON
# Petropoulos et al. 2016; PRJEB11202
# Zhou et al. 2019; PRJNA431392
# Xiang et al. 2020; PRJNA562548
# Tyser et al. 2021; PRJEB40781
# Zheng et al. 2019; PRJNA555602
# Yanagida et al. 2021; PRJNA720968
adata_files <- purrr::map(
c(
"PRJEB11202", "PRJNA431392", "PRJNA562548",
"PRJEB40781", "PRJNA555602", "PRJNA720968"
), \(x) {
file.path(
PROJECT_DIR,
"raw",
"public",
x,
"matrix",
"adata.h5ad"
)
}
)
adata_files <- unique(adata_files)
purrr::map_lgl(adata_files, file.exists)
[1] TRUE TRUE TRUE TRUE TRUE TRUE
BACKED <- NULL
matrix_readcount_use <- purrr::map(adata_files, function(x) {
cat(x, "\n")
ad$read_h5ad(
filename = x, backed = BACKED
) |>
extract_matrix_from_adata()
}) |>
purrr::reduce(cbind)
/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation/raw/public/PRJEB11202/matrix/adata.h5ad
/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation/raw/public/PRJNA431392/matrix/adata.h5ad
/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation/raw/public/PRJNA562548/matrix/adata.h5ad
/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation/raw/public/PRJEB40781/matrix/adata.h5ad
/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation/raw/public/PRJNA555602/matrix/adata.h5ad
/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation/raw/public/PRJNA720968/matrix/adata.h5ad
matrix_readcount_use <- matrix_readcount_use[
, sort(colnames(matrix_readcount_use))
]
dim(matrix_readcount_use)
[1] 33538 19651
BACKED <- "r"
cell_metadata <- purrr::map(adata_files, function(x) {
ad$read_h5ad(
filename = x, backed = BACKED
)$obs |>
tibble::rownames_to_column(var = "cell") |>
dplyr::select(cell, dplyr::everything())
}) |>
dplyr::bind_rows()
cell_metadata |> head()
cell batch num_umis num_features mt_percentage
1 ERS1079290 PRJEB11202 1884175 10506 0.006999350
2 ERS1079291 PRJEB11202 1290557 8956 0.009932920
3 ERS1079292 PRJEB11202 1686855 10893 0.007484935
4 ERS1079293 PRJEB11202 1181562 9858 0.010599528
5 ERS1079294 PRJEB11202 1868361 10619 0.007091777
6 ERS1079295 PRJEB11202 1331354 8659 0.009590988
cell_metadata_PRJEB11202 <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw",
"public",
"PRJEB11202",
"matrix/cell_metadata.csv"
),
show_col_types = FALSE
) |>
dplyr::mutate(
developmental_stage = stringr::str_remove(
string = individual,
pattern = "\\..+$"
),
developmental_stage = factor(
developmental_stage,
levels = stringr::str_sort(
unique(developmental_stage),
numeric = TRUE
)
),
#
lineage = factor(
inferred_lineage,
levels = c(
"epiblast",
"primitive_endoderm",
"trophectoderm",
"not_applicable"
)
)
) |>
dplyr::select(
cell, lineage, developmental_stage
)
lineage | num_cells | median_umis | |
---|---|---|---|
epiblast | 127 | 1,501,176.0 | |
primitive_endoderm | 108 | 1,626,576.5 | |
trophectoderm | 861 | 1,528,414.0 | |
not_applicable | 433 | 1,574,032.0 | |
Sum | — | 1,529 | — |
developmental_stage | num_cells | median_umis | |
---|---|---|---|
E3 | 81 | 1,560,759.0 | |
E4 | 190 | 1,737,179.5 | |
E5 | 377 | 1,320,641.0 | |
E6 | 415 | 1,303,893.0 | |
E7 | 466 | 1,826,628.0 | |
Sum | — | 1,529 | — |
cell_metadata_PRJNA562548 <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw",
"public",
"PRJNA562548",
"matrix/cell_metadata.csv"
),
show_col_types = FALSE
) |>
dplyr::select(
cell = `Sample Name`,
developmental_stage = Age,
lineage = Cell_type
) |>
dplyr::mutate(
developmental_stage = stringr::str_replace(
string = developmental_stage,
pattern = "embryo invitro day ",
replacement = "E"
),
developmental_stage = factor(
developmental_stage,
levels = stringr::str_sort(
x = unique(developmental_stage),
numeric = TRUE
)
),
lineage = factor(
lineage,
levels = c(
"ICM",
"EPI",
"PSA-EPI",
"Hypoblast",
"CTBs",
"STBs",
"EVTs"
)
)
)
lineage | num_cells | median_umis | |
---|---|---|---|
ICM | 52 | 9,301,780.5 | |
EPI | 126 | 8,140,892.0 | |
PSA-EPI | 44 | 10,626,505.5 | |
Hypoblast | 25 | 10,352,781.0 | |
CTBs | 159 | 11,932,826.0 | |
STBs | 109 | 10,947,518.0 | |
EVTs | 40 | 7,776,425.0 | |
Sum | — | 555 | — |
developmental_stage | num_cells | median_umis | |
---|---|---|---|
E6 | 63 | 8,417,642.0 | |
E7 | 65 | 10,379,784.0 | |
E8 | 59 | 12,844,992.0 | |
E9 | 68 | 11,678,631.0 | |
E10 | 77 | 10,266,959.0 | |
E12 | 88 | 9,614,121.5 | |
E13.5 | 15 | 12,687,897.0 | |
E14 | 120 | 8,247,721.5 | |
Sum | — | 555 | — |
lineage | num_cells | median_umis | |
---|---|---|---|
advanced mesoderm | 164 | 993,741.0 | |
axial mesoderm | 23 | 853,681.0 | |
ectodermal cell | 29 | 996,476.0 | |
emergent mesoderm | 185 | 911,736.0 | |
endodermal cell | 135 | 989,383.0 | |
epiblast cell | 133 | 790,584.0 | |
erythrocyte | 32 | 716,602.0 | |
hemogenic endothelial progenitor | 111 | 925,287.0 | |
nascent mesoderm | 98 | 787,231.5 | |
primitive streak | 202 | 826,282.5 | |
yolk sac mesoderm | 83 | 1,124,901.0 | |
Sum | — | 1,195 | — |
cell_metadata_PRJNA555602 <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw",
"public",
"PRJNA555602",
"matrix/cell_metadata.csv"
),
show_col_types = FALSE
) |>
dplyr::mutate(
batch = case_when(
old_ident == "10X_Embryoid" ~ "GSM3956280",
old_ident == "10X_H9_Amnion" ~ "GSM3956281"
),
cell = stringr::str_remove(string = cell, pattern = "\\..+"),
# cell = paste(batch, cell, sep = "_"),
#
cell_type = case_when(
rna_snn_res_0.3 %in% 0 ~ "Transwell-AMLC",
rna_snn_res_0.3 %in% 1 ~ "MeLC2",
rna_snn_res_0.3 %in% 2 ~ "Human_ES_cell",
rna_snn_res_0.3 %in% 3 ~ "MeLC1",
rna_snn_res_0.3 %in% 4 ~ "hPGCLC",
rna_snn_res_0.3 %in% 5 ~ "AMLC"
),
cell_type = factor(
cell_type,
levels = c(
"Human_ES_cell",
# human PGC-like cells
"hPGCLC",
"Transwell-AMLC",
# amniotic ectoderm-like cells
"AMLC",
# mesoderm-like cell
"MeLC1",
"MeLC2"
)
)
)
cell_type | num_cells | median_umis | |
---|---|---|---|
Human_ES_cell | 1,677 | 28,735.0 | |
hPGCLC | 1,224 | 21,419.0 | |
Transwell-AMLC | 2,845 | 27,137.0 | |
AMLC | 979 | 22,686.0 | |
MeLC1 | 1,405 | 23,269.0 | |
MeLC2 | 1,836 | 25,066.5 | |
Sum | — | 9,966 | — |
cell_metadata_PRJNA720968 <- vroom::vroom(
file = file.path(
PROJECT_DIR,
"raw/public",
"PRJNA720968",
"matrix",
"cell_metadata.csv"
),
show_col_types = FALSE
) |>
dplyr::mutate(
origin = factor(
origin,
levels = c("Blastocyst", "Blastoid")
),
#
developmental_stage = factor(
developmental_stage,
levels = stringr::str_sort(
unique(developmental_stage),
numeric = TRUE
)
),
#
lineage = paste(origin, lineage, sep = ": "),
lineage = factor(
lineage,
levels = c(
"Blastocyst: Epiblast",
"Blastocyst: Hypoblast",
"Blastocyst: Inner Cell Mass",
"Blastocyst: Inner Cell Mass-Trophectoderm Transition",
"Blastocyst: Early Trophectoderm",
"Blastocyst: Trophectoderm",
#
"Blastoid: Epiblast",
"Blastoid: Hypoblast",
"Blastoid: Transitioning",
"Blastoid: Trophectoderm"
)
)
)
lineage | num_cells | median_umis | |
---|---|---|---|
Blastocyst: Epiblast | 31 | 8,126,430.0 | |
Blastocyst: Hypoblast | 14 | 9,776,420.5 | |
Blastocyst: Inner Cell Mass | 22 | 7,335,337.0 | |
Blastocyst: Inner Cell Mass-Trophectoderm Transition | 23 | 8,187,758.0 | |
Blastocyst: Early Trophectoderm | 18 | 8,361,591.5 | |
Blastocyst: Trophectoderm | 117 | 7,541,800.0 | |
Blastoid: Epiblast | 73 | 7,916,385.0 | |
Blastoid: Hypoblast | 13 | 8,166,701.0 | |
Blastoid: Transitioning | 7 | 4,276,121.0 | |
Blastoid: Trophectoderm | 174 | 7,467,628.5 | |
NA | 3 | 4,890,522.0 | |
Sum | — | 495 | — |
developmental_stage | num_cells | median_umis | |
---|---|---|---|
Day3 | 159 | 7,329,571.0 | |
Day4 | 108 | 7,685,250.0 | |
Day5 | 68 | 7,692,113.5 | |
Day6 | 80 | 6,475,820.5 | |
Day7 | 80 | 8,645,422.0 | |
Sum | — | 495 | — |
devtools::session_info()
─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.3.1 (2023-06-16)
os macOS Ventura 13.5.2
system aarch64, darwin22.4.0
ui unknown
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/Chicago
date 2023-09-10
pandoc 2.19.2 @ /Users/jialei/.pyenv/shims/ (via rmarkdown)
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
bit 4.0.5 2022-11-15 [1] CRAN (R 4.3.0)
bit64 4.0.5 2020-08-30 [1] CRAN (R 4.3.0)
cachem 1.0.8 2023-05-01 [1] CRAN (R 4.3.0)
callr 3.7.3 2022-11-02 [1] CRAN (R 4.3.0)
cli 3.6.1 2023-03-23 [1] CRAN (R 4.3.0)
codetools 0.2-19 2023-02-01 [2] CRAN (R 4.3.1)
colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.3.0)
crayon 1.5.2 2022-09-29 [1] CRAN (R 4.3.0)
devtools 2.4.5.9000 2023-08-11 [1] Github (r-lib/devtools@163c3f2)
digest 0.6.33 2023-07-07 [1] CRAN (R 4.3.1)
dplyr * 1.1.3 2023-09-03 [1] CRAN (R 4.3.1)
ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.3.0)
evaluate 0.21 2023-05-05 [1] CRAN (R 4.3.0)
extrafont * 0.19 2023-01-18 [1] CRAN (R 4.3.0)
extrafontdb 1.0 2012-06-11 [1] CRAN (R 4.3.0)
fansi 1.0.4 2023-01-22 [1] CRAN (R 4.3.0)
farver 2.1.1 2022-07-06 [1] CRAN (R 4.3.0)
fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.3.0)
forcats * 1.0.0.9000 2023-04-23 [1] Github (tidyverse/forcats@4a8525a)
fs 1.6.3 2023-07-20 [1] CRAN (R 4.3.1)
furrr * 0.3.1 2022-08-15 [1] CRAN (R 4.3.1)
future * 1.33.0 2023-07-01 [1] CRAN (R 4.3.1)
generics 0.1.3 2022-07-05 [1] CRAN (R 4.3.0)
ggplot2 * 3.4.3.9000 2023-09-05 [1] Github (tidyverse/ggplot2@d180248)
globals 0.16.2 2022-11-21 [1] CRAN (R 4.3.0)
glue 1.6.2.9000 2023-04-23 [1] Github (tidyverse/glue@cbac82a)
gt 0.9.0.9000 2023-09-02 [1] Github (rstudio/gt@c73eece)
gtable 0.3.4.9000 2023-08-22 [1] Github (r-lib/gtable@c410a54)
hms 1.1.3 2023-03-21 [1] CRAN (R 4.3.0)
htmltools 0.5.6 2023-08-10 [1] CRAN (R 4.3.1)
htmlwidgets 1.6.2 2023-03-17 [1] CRAN (R 4.3.0)
jsonlite 1.8.7 2023-06-29 [1] CRAN (R 4.3.1)
knitr 1.43 2023-05-25 [1] CRAN (R 4.3.0)
lattice 0.21-8 2023-04-05 [2] CRAN (R 4.3.1)
lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.3.0)
listenv 0.9.0 2022-12-16 [1] CRAN (R 4.3.0)
lubridate * 1.9.2.9000 2023-07-22 [1] Github (tidyverse/lubridate@cae67ea)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.3.0)
Matrix * 1.6-1 2023-08-14 [2] CRAN (R 4.3.1)
memoise 2.0.1 2021-11-26 [1] CRAN (R 4.3.0)
munsell 0.5.0 2018-06-12 [1] CRAN (R 4.3.0)
parallelly 1.36.0 2023-05-26 [1] CRAN (R 4.3.0)
patchwork * 1.1.3.9000 2023-08-17 [1] Github (thomasp85/patchwork@51a6eff)
pillar 1.9.0 2023-03-22 [1] CRAN (R 4.3.0)
pkgbuild 1.4.2 2023-06-26 [1] CRAN (R 4.3.1)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.3.0)
pkgload 1.3.2.9000 2023-07-05 [1] Github (r-lib/pkgload@3cf9896)
png 0.1-8 2022-11-29 [1] CRAN (R 4.3.0)
prettyunits 1.1.1.9000 2023-04-23 [1] Github (r-lib/prettyunits@8706d89)
processx 3.8.2 2023-06-30 [1] CRAN (R 4.3.1)
ps 1.7.5 2023-04-18 [1] CRAN (R 4.3.0)
purrr * 1.0.2.9000 2023-08-11 [1] Github (tidyverse/purrr@ac4f5a9)
R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.3.0)
R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.3.0)
R.oo 1.25.0 2022-06-12 [1] CRAN (R 4.3.0)
R.utils 2.12.2 2022-11-11 [1] CRAN (R 4.3.0)
R6 2.5.1.9000 2023-04-23 [1] Github (r-lib/R6@e97cca7)
Rcpp 1.0.11 2023-07-06 [1] CRAN (R 4.3.1)
readr * 2.1.4.9000 2023-08-03 [1] Github (tidyverse/readr@80e4dc1)
remotes 2.4.2.9000 2023-06-09 [1] Github (r-lib/remotes@8875171)
reticulate 1.31 2023-08-10 [1] CRAN (R 4.3.1)
rlang 1.1.1.9000 2023-06-09 [1] Github (r-lib/rlang@c55f602)
rmarkdown 2.24.2 2023-09-07 [1] Github (rstudio/rmarkdown@8d2d9b8)
rstudioapi 0.15.0.9000 2023-09-07 [1] Github (rstudio/rstudioapi@19c80c0)
Rttf2pt1 1.3.12 2023-01-22 [1] CRAN (R 4.3.0)
sass 0.4.7 2023-07-15 [1] CRAN (R 4.3.1)
scales 1.2.1 2022-08-20 [1] CRAN (R 4.3.0)
sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.3.0)
stringi 1.7.12 2023-01-11 [1] CRAN (R 4.3.0)
stringr * 1.5.0.9000 2023-08-11 [1] Github (tidyverse/stringr@08ff36f)
styler * 1.10.2 2023-08-30 [1] Github (r-lib/styler@1976817)
tibble * 3.2.1.9005 2023-05-28 [1] Github (tidyverse/tibble@4de5c15)
tidyr * 1.3.0.9000 2023-04-23 [1] Github (tidyverse/tidyr@0764e65)
tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.3.0)
tidyverse * 2.0.0.9000 2023-04-23 [1] Github (tidyverse/tidyverse@8ec2e1f)
timechange 0.2.0 2023-01-11 [1] CRAN (R 4.3.0)
tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.3.0)
usethis 2.2.2.9000 2023-07-11 [1] Github (r-lib/usethis@467ff57)
utf8 1.2.3 2023-01-31 [1] CRAN (R 4.3.0)
vctrs 0.6.3 2023-06-14 [1] CRAN (R 4.3.0)
vroom 1.6.3.9000 2023-04-30 [1] Github (tidyverse/vroom@89b6aac)
withr 2.5.0 2022-03-03 [1] CRAN (R 4.3.0)
xfun 0.40 2023-08-09 [1] CRAN (R 4.3.1)
xml2 1.3.5 2023-07-06 [1] CRAN (R 4.3.1)
yaml 2.3.7 2023-01-23 [1] CRAN (R 4.3.0)
[1] /opt/homebrew/lib/R/4.3/site-library
[2] /opt/homebrew/Cellar/r/4.3.1/lib/R/library
─ Python configuration ───────────────────────────────────────────────────────
python: /Users/jialei/.pyenv/shims/python
libpython: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/libpython3.10.dylib
pythonhome: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3:/Users/jialei/.pyenv/versions/mambaforge-22.9.0-3
version: 3.10.9 | packaged by conda-forge | (main, Feb 2 2023, 20:26:08) [Clang 14.0.6 ]
numpy: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/python3.10/site-packages/numpy
numpy_version: 1.24.3
anndata: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/python3.10/site-packages/anndata
NOTE: Python version was forced by RETICULATE_PYTHON
──────────────────────────────────────────────────────────────────────────────
Styling 1 files:
summarize_datasets.qmd ✔
────────────────────────────────────────
Status Count Legend
✔ 1 File unchanged.
ℹ 0 File changed.
✖ 0 Styling threw an error.
────────────────────────────────────────