PBMC+HEK293T QCed data (UMI counts + matchSCore2 annotations) from 13 protocols. For more information check https://github.com/elimereu/matchSCore2 and http://dx.doi.org/10.1101/630087.
suppressPackageStartupMessages({
library(scran)
library(magrittr)
library(dplyr)
})
seed <- 1000
out_path <- here::here("out")
data_path <- here::here("data")
load(file=paste0(data_path, "/sce.all_classified.technologies.RData"))
head(colData(sce))
## DataFrame with 6 rows and 3 columns
## nnet2 ident batch
## <factor> <factor> <character>
## 10X2x5K_64221_AAACCTGCACTTCGAA B cells B cells Chromium
## 10X2x5K_64221_AAACCTGCAGTACACT CD4 T cells CD4 T cells 2 Chromium
## 10X2x5K_64221_AAACCTGTCCACTGGG CD4 T cells CD4 T cells 1 Chromium
## 10X2x5K_64221_AAACGGGAGAGCTTCT HEK cells HEK cells 2 Chromium
## 10X2x5K_64221_AAACGGGAGGTGGGTT HEK cells HEK cells 2 Chromium
## 10X2x5K_64221_AAACGGGCACACGCTG CD4 T cells CD4 T cells 1 Chromium
dim(sce)
## [1] 23381 20237
table(sce$batch)
##
## C1HT-medium C1HT-small CEL-Seq2 Chromium Chromium(sn) ddSEQ
## 2216 1606 1083 1604 1515 2109
## Drop-Seq ICELL8 inDrop MARS-Seq mcSCRB-Seq Quartz-Seq2
## 2261 1927 686 1481 1684 1333
## Smart-Seq2
## 732
discard_ct <- as_tibble(colData(sce)) %>% group_by(nnet2) %>% tally() %>% filter(n < 50)
sce <- sce[,!sce$nnet2 %in% as.character(discard_ct$nnet2)]
sce$nnet2 <- droplevels(sce$nnet2)
saveRDS(sce, file = paste0(out_path, "/sce_hca.rds"))