HCA protocols

PBMC+HEK293T QCed data (UMI counts + matchSCore2 annotations) from 13 protocols. For more information check https://github.com/elimereu/matchSCore2 and http://dx.doi.org/10.1101/630087.

suppressPackageStartupMessages({
    library(scran)
    library(magrittr)
    library(dplyr)
})

seed <- 1000
out_path <- here::here("out")
data_path <- here::here("data")
load(file=paste0(data_path, "/sce.all_classified.technologies.RData"))


head(colData(sce))
## DataFrame with 6 rows and 3 columns
##                                      nnet2         ident       batch
##                                   <factor>      <factor> <character>
## 10X2x5K_64221_AAACCTGCACTTCGAA     B cells       B cells    Chromium
## 10X2x5K_64221_AAACCTGCAGTACACT CD4 T cells CD4 T cells 2    Chromium
## 10X2x5K_64221_AAACCTGTCCACTGGG CD4 T cells CD4 T cells 1    Chromium
## 10X2x5K_64221_AAACGGGAGAGCTTCT   HEK cells   HEK cells 2    Chromium
## 10X2x5K_64221_AAACGGGAGGTGGGTT   HEK cells   HEK cells 2    Chromium
## 10X2x5K_64221_AAACGGGCACACGCTG CD4 T cells CD4 T cells 1    Chromium
dim(sce)
## [1] 23381 20237
table(sce$batch)
## 
##  C1HT-medium   C1HT-small     CEL-Seq2     Chromium Chromium(sn)        ddSEQ 
##         2216         1606         1083         1604         1515         2109 
##     Drop-Seq       ICELL8       inDrop     MARS-Seq   mcSCRB-Seq  Quartz-Seq2 
##         2261         1927          686         1481         1684         1333 
##   Smart-Seq2 
##          732
discard_ct <- as_tibble(colData(sce)) %>% group_by(nnet2) %>% tally() %>% filter(n < 50)
sce <- sce[,!sce$nnet2 %in% as.character(discard_ct$nnet2)]
sce$nnet2 <- droplevels(sce$nnet2) 

saveRDS(sce, file = paste0(out_path, "/sce_hca.rds"))