Sys.time()
[1] "2023-09-10 03:28:31 CDT"
Sys.time()
[1] "2023-09-10 03:28:31 CDT"
[1] "America/Chicago"
PROJECT_DIR <- "/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation"
score_snps <- function(m, cells, snp_concensus) {
furrr::future_map(c(cells), \(x) {
selected_cell <- x
snps_for_testing <- rownames(
m
)[m[, selected_cell] != 0]
snps_for_testing <- snps_for_testing[
snps_for_testing %in% names(snp_concensus)
]
# cat(length(snps_for_testing), "\n")
data.frame(
cell = selected_cell,
score = mean(
m[
snps_for_testing,
selected_cell
] == snp_concensus[snps_for_testing]
),
num_snps = length(snps_for_testing)
)
}) |>
dplyr::bind_rows() |>
dplyr::mutate(batch = stringr::str_remove(cell, "_[A-Z]{16}$"))
}
Load required packages.
library(tidyverse)
## ── Attaching core tidyverse packages ─────────────────── tidyverse 2.0.0.9000 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4.9000
## ✔ forcats 1.0.0.9000 ✔ stringr 1.5.0.9000
## ✔ ggplot2 3.4.3.9000 ✔ tibble 3.2.1.9005
## ✔ lubridate 1.9.2.9000 ✔ tidyr 1.3.0.9000
## ✔ purrr 1.0.2.9000
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(Matrix)
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(patchwork)
library(extrafont)
## Registering fonts with R
library(furrr)
## Loading required package: future
get_mode <- function(v, exclude_zero = FALSE) {
if (exclude_zero) {
v <- v[v != 0]
}
uniqv <- unique(v)
uniqv[which.max(tabulate(match(v, uniqv)))]
}
normalize <- function(x) {
return((x - min(x)) / (max(x) - min(x)))
}
add_panel_border <- function() {
ggplot2::theme(
plot.background = element_rect(
color = "grey70", fill = NA, linewidth = 0.25
)
)
}
`%+replace%` <- ggplot2::`%+replace%`
`%<-%` <- zeallot::`%<-%`
reticulate::py_config()
python: /Users/jialei/.pyenv/shims/python
libpython: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/libpython3.10.dylib
pythonhome: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3:/Users/jialei/.pyenv/versions/mambaforge-22.9.0-3
version: 3.10.9 | packaged by conda-forge | (main, Feb 2 2023, 20:26:08) [Clang 14.0.6 ]
numpy: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/python3.10/site-packages/numpy
numpy_version: 1.24.3
anndata: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/python3.10/site-packages/anndata
NOTE: Python version was forced by RETICULATE_PYTHON
VARTRIX_VCF <- "vartrix_LW203"
VARTRIX_MATRIX_DIR <- "no-duplicates_umi_mapq10_consensus"
matrix_snp <- purrr::map(c(
"LW186", "LW187", "LW188",
"LW189", "LW202", "LW203", "LW204"
), \(x) {
read_snp_matrix(
matrix_dir = file.path(
PROJECT_DIR, "genotyping", x,
VARTRIX_VCF, VARTRIX_MATRIX_DIR
)
)
}) |>
purrr::reduce(cbind)
matrix_snp |> dim()
[1] 48238 39755
cell_metadata_snp <- Matrix::colSums(matrix_snp) |>
tibble::enframe(name = "cell", value = "num_umis") |>
dplyr::mutate(
batch = stringr::str_remove(
string = cell,
pattern = "_[A-Z]{16}"
)
)
cell_metadata_snp |> head()
# A tibble: 6 × 3
cell num_umis batch
<chr> <dbl> <chr>
1 LW186_AAACCCAAGCCTCACG 348 LW186
2 LW186_AAACCCAAGTTGTAGA 396 LW186
3 LW186_AAACCCACAAATCGGG 271 LW186
4 LW186_AAACCCACAGCCTACG 96 LW186
5 LW186_AAACCCACAGGAAGTC 77 LW186
6 LW186_AAACCCACATTCAGGT 198 LW186
adata_files <- purrr::map(c("LW119", "LW120", "LW121", "LW122"), \(x) {
file.path(
PROJECT_DIR,
"raw",
x,
"matrix",
"adata.h5ad"
)
})
purrr::map_lgl(adata_files, file.exists)
[1] TRUE TRUE TRUE TRUE
BACKED <- NULL
matrix_readcount_use <- purrr::map(adata_files, \(x) {
cat(x, "\n")
ad$read_h5ad(
filename = x, backed = BACKED
) |>
extract_matrix_from_adata()
}) |>
purrr::reduce(cbind)
/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation/raw/LW119/matrix/adata.h5ad
/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation/raw/LW120/matrix/adata.h5ad
/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation/raw/LW121/matrix/adata.h5ad
/Users/jialei/Dropbox/Data/Projects/UTSW/Peri-implantation/raw/LW122/matrix/adata.h5ad
matrix_readcount_use <- matrix_readcount_use[
, sort(colnames(matrix_readcount_use))
]
matrix_readcount_use |> dim()
[1] 33538 8831
cells_LW203 <- cell_metadata_snp |>
dplyr::filter(batch == "LW203") |>
dplyr::pull(cell)
cells_LW203 |> length()
[1] 2571
snp_group <- split(
rownames(matrix_snp),
ceiling(seq_along(rownames(matrix_snp)) / 1000)
)
# get concensus snp table
snp_concensus_LW203 <- purrr::map(seq_len(length(snp_group)), \(x) {
apply(
matrix_snp[snp_group[[x]], cells_LW203],
MARGIN = 1, get_mode, TRUE
)
}) |>
unlist()
snp_concensus_LW203 <- snp_concensus_LW203[!is.na(snp_concensus_LW203)]
snp_scores_LW203 <- score_snps(
m = matrix_snp,
cells = cells_LW203,
snp_concensus = snp_concensus_LW203
)
snp_scores_LW187_LW189 <- score_snps(
m = matrix_snp,
cells = cells_LW187_LW189,
snp_concensus = snp_concensus_LW203
)
snp_scores_LW202_LW204 <- score_snps(
m = matrix_snp,
cells = cells_LW202_LW204,
snp_concensus = snp_concensus_LW203
)
snp_scores_LW186_LW188 <- score_snps(
m = matrix_snp,
cells = cells_LW186_LW188,
snp_concensus = snp_concensus_LW203
)
file_name <- "snp_scores_LW186_LW188.csv"
# file_name <- file.path(OUTPUT_DIR, file_name)
cat(file_name, "\n")
snp_scores_LW186_LW188.csv
if (!file.exists(file_name)) {
vroom::vroom_write(
x = snp_scores_LW186_LW188,
file = file_name,
delim = ","
)
}
MINIMAL_SNPS <- 20
BINWIDTH <- 0.0005
cutoff <- system(
glue::glue(
file.path(
PROJECT_DIR, "genotyping", "call_gmm.py"
),
" ",
file.path(
# OUTPUT_DIR,
"snp_scores_LW186_LW188.csv"
)
),
intern = TRUE
) |>
stringr::str_remove(
pattern = "\\["
) |>
stringr::str_remove(
pattern = "\\]"
) |>
stringr::str_split(
pattern = " "
) |>
unlist() |>
as.numeric() |>
rev()
snp_score_cutoff <- cutoff[4]
snp_score_cutoff
[1] 0.8289474
color_palette_assay <- c(
"Blastocyst (fibronectin)" = "#F28E2B",
"Blastocyst (stromal cells)" = "#4E79A7",
"Blastoid (fibronectin)" = "#E15759",
"Blastoid (stromal cells)" = "#76B7B2",
"Stromal cells" = "#59A14F"
)
ann_text <- data.frame(
num_cells = snp_scores_LW203 |>
dplyr::filter(
num_snps >= MINIMAL_SNPS,
batch == "LW203",
score <= snp_score_cutoff
) |>
nrow(),
batch = "LW203"
)
p_histogram_snp_scores <- list(
snp_scores_LW203,
snp_scores_LW187_LW189,
snp_scores_LW186_LW188,
snp_scores_LW202_LW204
) |>
dplyr::bind_rows() |>
dplyr::mutate(
batch_annotated = dplyr::case_when(
batch %in% c("LW187", "LW189") ~ "Blastocyst (fibronectin)",
batch %in% c("LW186", "LW188") ~ "Blastocyst (IESCs)",
batch %in% c("LW202") ~ "Blastoid (fibronectin)",
batch %in% c("LW203") ~ "IESCs",
batch %in% c("LW204") ~ "Blastoid (IESCs)"
),
label = dplyr::case_when(
batch %in% c("LW187", "LW189") ~ paste(
batch_annotated, "LW187, LW189",
sep = "\n"
),
batch %in% c("LW186", "LW188") ~ paste(
batch_annotated, "LW186, LW188",
sep = "\n"
),
batch %in% c("LW202") ~ paste(
batch_annotated, "LW202",
sep = "\n"
),
batch %in% c("LW203") ~ paste(
batch_annotated, "LW203 ",
sep = "\n"
),
batch %in% c("LW204") ~ paste(
batch_annotated, "LW204",
sep = "\n"
),
)
) |>
dplyr::filter(
num_snps >= MINIMAL_SNPS,
) |>
dplyr::mutate(
color = dplyr::case_when(
score >= snp_score_cutoff ~ "#F8B195",
TRUE ~ "grey35"
)
) |>
{
\(x) {
x |>
ggplot2::ggplot(
ggplot2::aes(
x = score, fill = color
)
) +
ggplot2::geom_histogram(
binwidth = 0.02,
boundary = 0,
closed = "left"
) +
ggplot2::geom_vline(
xintercept = snp_score_cutoff,
color = "salmon",
linewidth = 0.25
) +
ggplot2::scale_x_continuous(
name = "SNP score"
) +
ggplot2::scale_y_continuous(
name = "Number of cells"
) +
ggplot2::scale_fill_identity() +
ggplot2::facet_wrap(
ggplot2::vars(label),
scales = "free_y",
nrow = 1
) +
ggplot2::theme_grey(
base_size = 6, base_family = "Arial"
) +
ggplot2::theme(
legend.background = ggplot2::element_blank(),
legend.key = ggplot2::element_blank(),
#
# panel.background = ggplot2::element_blank(),
#
plot.background = ggplot2::element_blank(),
strip.text = ggplot2::element_text(
family = "Arial", size = 6
)
)
}
}()
g <- ggplot2::ggplot_gtable(
ggplot2::ggplot_build(p_histogram_snp_scores)
)
stript <- which(grepl("strip-t", g$layout$name))
k <- 1
for (i in stript) {
j <- which(grepl("rect", g$grobs[[i]]$grobs[[1]]$childrenOrder))
g$grobs[[i]]$grobs[[1]]$children[[j]]$gp$fill <- setNames(
object = color_palette_assay,
nm = seq_len(length(color_palette_assay))
)[k]
k <- k + 1
}
grid::grid.draw(g)
devtools::session_info()
─ Session info ───────────────────────────────────────────────────────────────
setting value
version R version 4.3.1 (2023-06-16)
os macOS Ventura 13.5.2
system aarch64, darwin22.4.0
ui unknown
language (EN)
collate en_US.UTF-8
ctype en_US.UTF-8
tz America/Chicago
date 2023-09-10
pandoc 2.19.2 @ /Users/jialei/.pyenv/shims/ (via rmarkdown)
─ Packages ───────────────────────────────────────────────────────────────────
package * version date (UTC) lib source
cachem 1.0.8 2023-05-01 [1] CRAN (R 4.3.0)
callr 3.7.3 2022-11-02 [1] CRAN (R 4.3.0)
cli 3.6.1 2023-03-23 [1] CRAN (R 4.3.0)
codetools 0.2-19 2023-02-01 [2] CRAN (R 4.3.1)
colorspace 2.1-0 2023-01-23 [1] CRAN (R 4.3.0)
crayon 1.5.2 2022-09-29 [1] CRAN (R 4.3.0)
devtools 2.4.5.9000 2023-08-11 [1] Github (r-lib/devtools@163c3f2)
digest 0.6.33 2023-07-07 [1] CRAN (R 4.3.1)
dplyr * 1.1.3 2023-09-03 [1] CRAN (R 4.3.1)
ellipsis 0.3.2 2021-04-29 [1] CRAN (R 4.3.0)
evaluate 0.21 2023-05-05 [1] CRAN (R 4.3.0)
extrafont * 0.19 2023-01-18 [1] CRAN (R 4.3.0)
extrafontdb 1.0 2012-06-11 [1] CRAN (R 4.3.0)
fansi 1.0.4 2023-01-22 [1] CRAN (R 4.3.0)
farver 2.1.1 2022-07-06 [1] CRAN (R 4.3.0)
fastmap 1.1.1 2023-02-24 [1] CRAN (R 4.3.0)
forcats * 1.0.0.9000 2023-04-23 [1] Github (tidyverse/forcats@4a8525a)
fs 1.6.3 2023-07-20 [1] CRAN (R 4.3.1)
furrr * 0.3.1 2022-08-15 [1] CRAN (R 4.3.1)
future * 1.33.0 2023-07-01 [1] CRAN (R 4.3.1)
generics 0.1.3 2022-07-05 [1] CRAN (R 4.3.0)
ggplot2 * 3.4.3.9000 2023-09-05 [1] Github (tidyverse/ggplot2@d180248)
globals 0.16.2 2022-11-21 [1] CRAN (R 4.3.0)
glue 1.6.2.9000 2023-04-23 [1] Github (tidyverse/glue@cbac82a)
gtable 0.3.4.9000 2023-08-22 [1] Github (r-lib/gtable@c410a54)
hms 1.1.3 2023-03-21 [1] CRAN (R 4.3.0)
htmltools 0.5.6 2023-08-10 [1] CRAN (R 4.3.1)
htmlwidgets 1.6.2 2023-03-17 [1] CRAN (R 4.3.0)
jsonlite 1.8.7 2023-06-29 [1] CRAN (R 4.3.1)
knitr 1.43 2023-05-25 [1] CRAN (R 4.3.0)
labeling 0.4.3 2023-08-29 [1] CRAN (R 4.3.1)
lattice 0.21-8 2023-04-05 [2] CRAN (R 4.3.1)
lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.3.0)
listenv 0.9.0 2022-12-16 [1] CRAN (R 4.3.0)
lubridate * 1.9.2.9000 2023-07-22 [1] Github (tidyverse/lubridate@cae67ea)
magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.3.0)
Matrix * 1.6-1 2023-08-14 [2] CRAN (R 4.3.1)
memoise 2.0.1 2021-11-26 [1] CRAN (R 4.3.0)
munsell 0.5.0 2018-06-12 [1] CRAN (R 4.3.0)
parallelly 1.36.0 2023-05-26 [1] CRAN (R 4.3.0)
patchwork * 1.1.3.9000 2023-08-17 [1] Github (thomasp85/patchwork@51a6eff)
pillar 1.9.0 2023-03-22 [1] CRAN (R 4.3.0)
pkgbuild 1.4.2 2023-06-26 [1] CRAN (R 4.3.1)
pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.3.0)
pkgload 1.3.2.9000 2023-07-05 [1] Github (r-lib/pkgload@3cf9896)
png 0.1-8 2022-11-29 [1] CRAN (R 4.3.0)
prettyunits 1.1.1.9000 2023-04-23 [1] Github (r-lib/prettyunits@8706d89)
processx 3.8.2 2023-06-30 [1] CRAN (R 4.3.1)
ps 1.7.5 2023-04-18 [1] CRAN (R 4.3.0)
purrr * 1.0.2.9000 2023-08-11 [1] Github (tidyverse/purrr@ac4f5a9)
R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.3.0)
R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.3.0)
R.oo 1.25.0 2022-06-12 [1] CRAN (R 4.3.0)
R.utils 2.12.2 2022-11-11 [1] CRAN (R 4.3.0)
R6 2.5.1.9000 2023-04-23 [1] Github (r-lib/R6@e97cca7)
Rcpp 1.0.11 2023-07-06 [1] CRAN (R 4.3.1)
readr * 2.1.4.9000 2023-08-03 [1] Github (tidyverse/readr@80e4dc1)
remotes 2.4.2.9000 2023-06-09 [1] Github (r-lib/remotes@8875171)
reticulate 1.31 2023-08-10 [1] CRAN (R 4.3.1)
rlang 1.1.1.9000 2023-06-09 [1] Github (r-lib/rlang@c55f602)
rmarkdown 2.24.2 2023-09-07 [1] Github (rstudio/rmarkdown@8d2d9b8)
rstudioapi 0.15.0.9000 2023-09-07 [1] Github (rstudio/rstudioapi@19c80c0)
Rttf2pt1 1.3.12 2023-01-22 [1] CRAN (R 4.3.0)
scales 1.2.1 2022-08-20 [1] CRAN (R 4.3.0)
sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.3.0)
stringi 1.7.12 2023-01-11 [1] CRAN (R 4.3.0)
stringr * 1.5.0.9000 2023-08-11 [1] Github (tidyverse/stringr@08ff36f)
styler * 1.10.2 2023-08-30 [1] Github (r-lib/styler@1976817)
tibble * 3.2.1.9005 2023-05-28 [1] Github (tidyverse/tibble@4de5c15)
tidyr * 1.3.0.9000 2023-04-23 [1] Github (tidyverse/tidyr@0764e65)
tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.3.0)
tidyverse * 2.0.0.9000 2023-04-23 [1] Github (tidyverse/tidyverse@8ec2e1f)
timechange 0.2.0 2023-01-11 [1] CRAN (R 4.3.0)
tzdb 0.4.0 2023-05-12 [1] CRAN (R 4.3.0)
usethis 2.2.2.9000 2023-07-11 [1] Github (r-lib/usethis@467ff57)
utf8 1.2.3 2023-01-31 [1] CRAN (R 4.3.0)
vctrs 0.6.3 2023-06-14 [1] CRAN (R 4.3.0)
withr 2.5.0 2022-03-03 [1] CRAN (R 4.3.0)
xfun 0.40 2023-08-09 [1] CRAN (R 4.3.1)
yaml 2.3.7 2023-01-23 [1] CRAN (R 4.3.0)
zeallot 0.1.0.9000 2023-06-11 [1] Github (nteetor/zeallot@55167bf)
[1] /opt/homebrew/lib/R/4.3/site-library
[2] /opt/homebrew/Cellar/r/4.3.1/lib/R/library
─ Python configuration ───────────────────────────────────────────────────────
python: /Users/jialei/.pyenv/shims/python
libpython: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/libpython3.10.dylib
pythonhome: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3:/Users/jialei/.pyenv/versions/mambaforge-22.9.0-3
version: 3.10.9 | packaged by conda-forge | (main, Feb 2 2023, 20:26:08) [Clang 14.0.6 ]
numpy: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/python3.10/site-packages/numpy
numpy_version: 1.24.3
anndata: /Users/jialei/.pyenv/versions/mambaforge-22.9.0-3/lib/python3.10/site-packages/anndata
NOTE: Python version was forced by RETICULATE_PYTHON
──────────────────────────────────────────────────────────────────────────────