suppressPackageStartupMessages({
  library(purrr)
  library(tidyr)
  library(dplyr)
  library(jcolors)
  library(hrbrthemes)
  library(ggplot2)
  library(magrittr)
})

options(bitmapType='cairo')
default_guide <- ggplot2::waiver()

Dataset and metrics

data <- params$data
data_path <- params$data_path
size_tab <- readRDS(params$size_tab)
out_path <- params$out_path

time_res <- list.files(path = data_path, pattern = ".txt")

metrics <- time_res %>% gsub('.*[0-9]_', '', .) %>% 
    gsub('.txt', '', .) %>% unique()

size_fac <- time_res %>% gsub('[a-z]*', '', .) %>% gsub('^[0-9]', '', .) %>% 
    gsub('_*', '', .) %>% gsub('\\.$', '', .) %>% unique()

size_dat <- as.vector(outer(data, size_fac, paste, sep="_"))

read_time <- function(file){
    inf <- read.delim2(paste0(data_path, file), header = FALSE, sep = "\t", dec = ",")
    time1 <- inf[[2]][[2]] %>% gsub('.*: ', '', .) %>% as.numeric()
    time2 <- inf[[2]][[3]] %>% gsub('.*: ', '', .) %>% as.numeric()
    time <- time1 + time2
    rss <- inf[[2]][[10]] %>% gsub('.*: ', '', .) %>% as.numeric()
    size_f <- file %>% gsub('[a-z]*', '', .) %>% gsub('^[0-9]', '', .) %>% 
    gsub('_*', '', .) %>% gsub('\\.$', '', .)
    name <- file %>% gsub('_[0-9].*', '', .)
    metric <- file %>% gsub('.*[0-9]_', '', .) %>% gsub('.txt', '', .)
    cells <- size_tab[[name]][[size_f]]
    res <- data.frame("CPU_time" = time,
                      "RSS" = rss,
                      "cells" = cells[[2]],
                      "genes" = cells[[1]],
                      "metric" = metric)
}

res_tab <- time_res %>% map(read_time) %>% bind_rows()

cols_data <-c(c(jcolors('pal6'),jcolors('pal8'))[c(1,8,14,5,2:4,6,7,9:13,15:20)],jcolors('pal4'))
names(cols_data) <- c()

cols <-c(c(jcolors('pal6'),jcolors('pal8'), jcolors('pal7'))[c(1,12,18,25,27,2,4,7,3,6,8,14,9,20)],jcolors('pal4'))


#### ---------- Order by metric type ----------------------------------####### 
#(manual needs to be adjusted if new metrics are added)
res_tab$metric <- as.factor(res_tab$metric)
res_tab$metric <- recode(res_tab$metric, graph_connectivity = "graph", kbet = "kBet")
cms_ind <- grep("cms", levels(res_tab$metric))
lisi_ind <- grep("isi", levels(res_tab$metric))
ent_ind <- grep("entropy", levels(res_tab$metric))
mm_ind <- grep("mm", levels(res_tab$metric))
asw_ind <- grep("sw", levels(res_tab$metric))
kbet_ind <- grep("kbet", levels(res_tab$metric), ignore.case = TRUE)
graph_ind <- grep("graph", levels(res_tab$metric))
pcr_ind <- grep("pcr", levels(res_tab$metric))
metric_order <- levels(res_tab$metric)[c(cms_ind, lisi_ind, ent_ind, mm_ind, 
                                       kbet_ind, asw_ind, graph_ind, pcr_ind)]

#adjust metric names
names(cols) <- levels(res_tab$metric)
####--------------------------------------------------------------------########

Visualize time and memory results

#change units
res_tab$CPU_time <- res_tab$CPU_time/(60*60)
res_tab$RSS <- res_tab$RSS/1000000
res_tab$cells <- res_tab$cells/1000

res_tab <- res_tab %>% mutate(metric2 = as.factor(metric))
cols_rep <- rep(cols[1: length(levels(res_tab$metric2))], 
                each = length(levels(as.factor(res_tab$cells))))
n_t <- which(metric_order %in% "asw")*length(levels(as.factor(res_tab$cells)))
cols_rep <- cols_rep[-c(n_t, n_t - 1)]
res_tab$metric <- factor(res_tab$metric, levels = metric_order)
res_tab$genes <- as.factor(res_tab$genes)

sep_trends <- function(com1, com2){
  p <- ggplot(res_tab, aes_string(x = com1, y =com2)) +
        geom_line(data=res_tab %>% dplyr::select(-metric) %>% 
                    dplyr::filter(as.numeric(as.character(genes)) == max(as.numeric(as.character(genes)))),
                  aes(group=metric2),
                   color="grey", size=0.5, alpha=0.5) + 
        geom_line( aes(color=metric, linetype=genes), color=cols_rep, size=1.2 ) + 
        theme_ipsum(base_family = 'Helvetica') +
        theme(
        legend.position="none",
        plot.title = element_text(size=14),
        axis.text.x = element_text(size=12),
        axis.text.y = element_text(size=12),
      ) +
      ggtitle(paste0(com1, " - ", com2)) +
      facet_wrap(~metric)
    p
}


p1 <- sep_trends("cells", "RSS") + ylab("RSS [GB]") + xlab("# cells (thousands)")
p2 <- sep_trends("cells", "CPU_time") + ylab("CPU_time [h]") + scale_y_log10() + 
  xlab("# cells (thousands)")
p3 <- sep_trends("RSS", "CPU_time") + ylab("CPU_time [h]") + scale_y_log10() + 
  xlab("RSS [GB]")

p4 <- ggplot(res_tab, aes(x = cells, y = CPU_time)) +
        geom_line(aes(color = metric, linetype=genes)) + 
        scale_y_log10() +
        theme_ipsum(base_family = 'Helvetica') +
        geom_point(aes(color = metric, shape = genes), size = 3) +
        scale_color_manual(values = cols) +
        ggtitle("CPU time") + 
        ylab("CPU_time [h]") +
        xlab("# cells (thousands)")

p5 <- ggplot(res_tab, aes(x = cells, y = RSS)) +
        geom_line(aes(color = metric, linetype=genes)) + 
        theme_ipsum(base_family = 'Helvetica') +
        geom_point(aes(color = metric, shape = genes), size = 3) +
        scale_color_manual(values = cols) +
        ggtitle("RSS") + 
        ylab("RSS [GB]") +
        xlab("# cells (thousands)")


# res_tab$genes <- factor(res_tab$genes, levels = c("8331", "23381"), 
#                   labels = c("dataset1", "dataset2"))

gene.labs <- c("dataset1 (max. 68K cells)", "dataset2 (max. 80K cells)")
names(gene.labs) <- c("8331", "23381")

p6 <- ggplot(res_tab, aes(x = RSS, y = CPU_time)) +
        geom_line(aes(color = metric, linetype=genes)) + 
        theme_ipsum(base_family = 'Helvetica') +
        geom_point(aes(color = metric), size = 4) +
        scale_color_manual(values = cols) +
        ggtitle("CPU vs. RSS") + 
        scale_y_log10() +
        ylab("CPU_time [h]") +
        xlab("RSS [GB]") +
        facet_wrap(~genes, ncol = 2, labeller = labeller(genes = gene.labs), scales = "free_x")



p1

p2

p3

p4

p5

p6

saveRDS(p1, paste0(out_path, "rss_cells.rds"))
saveRDS(p2, paste0(out_path, "time_cells.rds"))
saveRDS(p3, paste0(out_path, "rss_time.rds"))
saveRDS(p4, paste0(out_path, "time_cells_all.rds"))
saveRDS(p5, paste0(out_path, "rss_cells_all.rds"))
saveRDS(p6, paste0(out_path, "rss_cpu_all.rds"))
saveRDS(res_tab, paste0(data_path, "summary_time_mem.rds"))
sessionInfo()
## R version 3.6.1 (2019-07-05)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 16.04.6 LTS
## 
## Matrix products: default
## BLAS:   /home/aluetg/R/lib/R/lib/libRblas.so
## LAPACK: /home/aluetg/R/lib/R/lib/libRlapack.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices datasets  utils     methods   base     
## 
## other attached packages:
## [1] magrittr_1.5     ggplot2_3.3.0    hrbrthemes_0.8.0 jcolors_0.0.4   
## [5] dplyr_0.8.5      tidyr_1.0.2      purrr_0.3.3     
## 
## loaded via a namespace (and not attached):
##  [1] Rcpp_1.0.3        pillar_1.4.3      compiler_3.6.1    tools_3.6.1      
##  [5] extrafont_0.17    digest_0.6.25     evaluate_0.14     lifecycle_0.2.0  
##  [9] tibble_2.1.3      gtable_0.3.0      pkgconfig_2.0.3   rlang_0.4.5      
## [13] yaml_2.2.1        xfun_0.12         Rttf2pt1_1.3.8    withr_2.1.2      
## [17] stringr_1.4.0     knitr_1.28        gdtools_0.2.2     vctrs_0.2.3      
## [21] systemfonts_0.2.2 grid_3.6.1        tidyselect_1.0.0  glue_1.3.1       
## [25] R6_2.4.1          rmarkdown_2.1     farver_2.0.3      extrafontdb_1.0  
## [29] scales_1.1.0      htmltools_0.4.0   assertthat_0.2.1  colorspace_1.4-1 
## [33] renv_0.9.3-44     labeling_0.3      stringi_1.4.6     munsell_0.5.0    
## [37] crayon_1.3.4