A human pancreatic islet cell datasets produced across four technologies, CelSeq (GSE81076) CelSeq2 (GSE85241), Fluidigm C1 (GSE86469), and SMART-Seq2 (E-MTAB-5061). Count data are provided by https://satijalab.org, where they are used to demonstrate seurat integration. Further details can be assessed via https://satijalab.org/seurat/v3.0/integration.html.
suppressPackageStartupMessages({
library(CellBench)
library(cowplot)
library(ggplot2)
library(scater)
library(jcolors)
library(CellMixS)
library(gridExtra)
library(purrr)
library(jcolors)
library(here)
library(tidyr)
library(dplyr)
library(stringr)
library(variancePartition)
library(stringr)
library(Seurat)
})
seed <- 1000
Load data
out_path <- here::here("out")
data_path <- here::here("data")
pancreas.data <- readRDS(paste0(data_path,"/pancreas_expression_matrix.rds"))
# load metadata
metadata <- readRDS(paste0(data_path,"/pancreas_metadata.rds"))
# create SeuratObject
pancreas <- CreateSeuratObject(pancreas.data, meta.data = metadata)
pancreas.list <- SplitObject(pancreas, split.by = "tech")
for (i in 1:length(pancreas.list)) {
pancreas.list[[i]] <- NormalizeData(pancreas.list[[i]], verbose = FALSE)
pancreas.list[[i]] <- FindVariableFeatures(pancreas.list[[i]],
selection.method = "vst",
nfeatures = 2000,
verbose = FALSE)
}
reference.list <- pancreas.list[c("celseq", "celseq2", "smartseq2")]
# find anchors & integrate
pancreas.anchors <- FindIntegrationAnchors(object.list = reference.list, dims = 1:30)
## Computing 2000 integration features
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 3510 anchors
## Filtering anchors
## Retained 2759 anchors
## Extracting within-dataset neighbors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 3501 anchors
## Filtering anchors
## Retained 2729 anchors
## Extracting within-dataset neighbors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
## Found 6176 anchors
## Filtering anchors
## Retained 4561 anchors
## Extracting within-dataset neighbors
pancreas.integrated <- IntegrateData(anchorset = pancreas.anchors, dims = 1:30)
## Merging dataset 1 into 2
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Merging dataset 3 into 2 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Warning: Adding a command log without an assay associated with it
# scale integrated data
DefaultAssay(pancreas.integrated) <- "integrated"
pancreas.integrated <- ScaleData(pancreas.integrated, verbose = FALSE)
pancreas.integrated <- RunPCA(pancreas.integrated, npcs = 30, verbose = FALSE)
pancreas.integrated <- RunTSNE(pancreas.integrated, reduction = "pca", dims = 1:30)
pancreas.integrated <- RunUMAP(pancreas.integrated, reduction = "pca", dims = seq_len(30),
seed.use = seed, verbose = FALSE,n.neighbors = 30, min.dist = 0.5)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
p1 <- DimPlot(pancreas.integrated, reduction = "tsne", group.by = "tech")
p2 <- DimPlot(pancreas.integrated, reduction = "tsne", group.by = "celltype",
label = TRUE, repel = TRUE) + NoLegend()
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.
plot_grid(p1, p2)
seurat <- pancreas.integrated
sce <- SingleCellExperiment(
assays=list(
counts=seurat@assays$RNA@counts,
logcounts=seurat@assays$RNA@data),
colData=seurat@meta.data,
reducedDims=lapply(seurat@reductions, FUN=function(x) x@cell.embeddings)
)
# Save data
saveRDS(sce, file = paste0(out_path, "/sce_pancreas.rds"))
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
##
## Matrix products: default
## BLAS: /home/aluetg/R/lib/R/lib/libRblas.so
## LAPACK: /home/aluetg/R/lib/R/lib/libRlapack.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] Seurat_3.1.4 variancePartition_1.16.1
## [3] scales_1.1.0 foreach_1.4.8
## [5] limma_3.42.2 stringr_1.4.0
## [7] dplyr_0.8.4 tidyr_1.0.2
## [9] here_0.1 purrr_0.3.3
## [11] gridExtra_2.3 CellMixS_1.2.3
## [13] kSamples_1.2-9 SuppDists_1.1-9.5
## [15] jcolors_0.0.4 scater_1.14.6
## [17] ggplot2_3.2.1 cowplot_1.0.0
## [19] CellBench_1.2.0 tibble_2.1.3
## [21] magrittr_1.5 SingleCellExperiment_1.8.0
## [23] SummarizedExperiment_1.16.1 DelayedArray_0.12.2
## [25] BiocParallel_1.20.1 matrixStats_0.55.0
## [27] Biobase_2.46.0 GenomicRanges_1.38.0
## [29] GenomeInfoDb_1.22.0 IRanges_2.20.2
## [31] S4Vectors_0.24.3 BiocGenerics_0.32.0
##
## loaded via a namespace (and not attached):
## [1] backports_1.1.5 BiocFileCache_1.10.2 sn_1.5-5
## [4] plyr_1.8.5 igraph_1.2.4.2 lazyeval_0.2.2
## [7] splines_3.6.1 listenv_0.8.0 TH.data_1.0-10
## [10] digest_0.6.25 htmltools_0.4.0 viridis_0.5.1
## [13] gdata_2.18.0 memoise_1.1.0 cluster_2.1.0
## [16] doParallel_1.0.15 ROCR_1.0-7 globals_0.12.5
## [19] RcppParallel_4.4.4 sandwich_2.5-1 prettyunits_1.1.1
## [22] colorspace_1.4-1 blob_1.2.1 rappdirs_0.3.1
## [25] ggrepel_0.8.1 xfun_0.12 crayon_1.3.4
## [28] RCurl_1.98-1.1 jsonlite_1.6.1 lme4_1.1-21
## [31] survival_3.1-8 zoo_1.8-7 iterators_1.0.12
## [34] ape_5.3 glue_1.3.1 gtable_0.3.0
## [37] zlibbioc_1.32.0 XVector_0.26.0 listarrays_0.3.0
## [40] leiden_0.3.3 BiocSingular_1.2.2 future.apply_1.4.0
## [43] mvtnorm_1.1-0 DBI_1.1.0 bibtex_0.4.2.2
## [46] Rcpp_1.0.3 plotrix_3.7-7 metap_1.3
## [49] viridisLite_0.3.0 progress_1.2.2 reticulate_1.14
## [52] bit_1.1-15.2 rsvd_1.0.3 tsne_0.1-3
## [55] htmlwidgets_1.5.1 httr_1.4.1 gplots_3.0.3
## [58] RColorBrewer_1.1-2 TFisher_0.2.0 ica_1.0-2
## [61] farver_2.0.3 pkgconfig_2.0.3 uwot_0.1.5
## [64] dbplyr_1.4.2 labeling_0.3 tidyselect_1.0.0
## [67] rlang_0.4.5 reshape2_1.4.3 munsell_0.5.0
## [70] tools_3.6.1 RSQLite_2.2.0 ggridges_0.5.2
## [73] evaluate_0.14 yaml_2.2.1 npsurv_0.4-0
## [76] knitr_1.28 bit64_0.9-7 fitdistrplus_1.0-14
## [79] caTools_1.18.0 RANN_2.6.1 pbapply_1.4-2
## [82] future_1.16.0 nlme_3.1-144 compiler_3.6.1
## [85] pbkrtest_0.4-8.6 png_0.1-7 plotly_4.9.2
## [88] beeswarm_0.2.3 curl_4.3 lsei_1.2-0
## [91] stringi_1.4.6 lattice_0.20-40 Matrix_1.2-18
## [94] nloptr_1.2.2 multtest_2.42.0 vctrs_0.2.3
## [97] mutoss_0.1-12 pillar_1.4.3 lifecycle_0.1.0
## [100] Rdpack_0.11-1 lmtest_0.9-37 RcppAnnoy_0.0.15
## [103] BiocNeighbors_1.4.2 data.table_1.12.8 bitops_1.0-6
## [106] irlba_2.3.3 gbRd_0.4-11 patchwork_1.0.0
## [109] colorRamps_2.3 R6_2.4.1 KernSmooth_2.23-16
## [112] vipor_0.4.5 codetools_0.2-16 boot_1.3-24
## [115] MASS_7.3-51.5 gtools_3.8.1 assertthat_0.2.1
## [118] rprojroot_1.3-2 withr_2.1.2 sctransform_0.2.1
## [121] mnormt_1.5-6 multcomp_1.4-12 GenomeInfoDbData_1.2.2
## [124] hms_0.5.3 grid_3.6.1 minqa_1.2.4
## [127] rmarkdown_2.1 DelayedMatrixStats_1.8.0 Rtsne_0.15
## [130] numDeriv_2016.8-1.1 lubridate_1.7.4 ggbeeswarm_0.6.0