library(scran)
library(dplyr)
library(tidyr)
library(ggplot2)
library(scuttle)
library(DESeq2)
library(edgeR)
library(VennDiagram)
library(clustifyr)
library(RColorBrewer)
library(ComplexHeatmap)
library(scater)
library(patchwork)
library(SingleCellExperiment)
library(stringr)
library(gridExtra)valve subgroups
Valve subgroups
Explore expression pattern of valve sub types
Preamble
Data
Single cell data
sce <- readRDS(file.path("..", "data", "sce_all_metadata_genes.rds"))
sce_val <- sce[,sce$logcounts.scaled.pca.harmony.neighbors_connectivities.leiden_1res %in% c("8", "9")]
sce_val$val_cluster <- sce_val$logcounts.scaled.pca.harmony.neighbors_connectivities.leiden_1res |>
droplevels()
levels(sce_val$val_cluster) <- c("valve1", "valve2")Cluster plot
plotReducedDim(sce, dimred="umap",
colour_by="logcounts.scaled.pca.harmony.neighbors_connectivities.leiden_1res",
point_size = 0.8)
Pseudobulk valve1 vs valve2
pb_val <- aggregateAcrossCells(sce_val,
id=colData(sce_val)[,c("val_cluster", "donor")],
use.assay.type = "counts")
pb_val <- pb_val[,pb_val$ncells >= 10]
print(table(pb_val$val_cluster))
valve1 valve2
7 7
# de
design <- model.matrix(~ donor + val_cluster, as.data.frame(colData(pb_val)))
dgl <- DGEList(counts(pb_val))
dgl <- calcNormFactors(dgl)
dgl <- estimateDisp(dgl, design)
fit <- glmQLFit(dgl, design)
de <- glmQLFTest(fit, coef="val_clustervalve2")
tt <- topTags(de, n = Inf)$table
tt$full_gene_name <- rownames(tt)
tt$gene_symbol <- gsub("^.*\\.","",rownames(tt))
saveRDS(tt, file.path("..", "out", "de", "valve2_vs_1.rds"))
write.csv(tt, file.path("..", "out", "de", "valve2_vs_1.csv"))Bulk expression data
fc_files <- list.files("../data/bulk-data-featurecounts", pattern="*.txt", full.names=TRUE)
fc_name <- list.files("../data/bulk-data-featurecounts", pattern="*.txt", full.names=FALSE)
fc_name <- gsub(".txt", "", fc_name)
fc_list <- lapply(fc_files, read.table, header = TRUE)
names(fc_list) <- fc_name
fc_list <- lapply(names(fc_list), function(fc_nam){
fc <- fc_list[[fc_nam]]
fc[,fc_nam] <- fc$matchCounts
fc <- fc |> select(-matchCounts)
fc
})
fc_tab <- fc_list |> purrr::reduce(full_join, by = "Identifier")
rownames(fc_tab) <- fc_tab$Identifier
fc_tab <- fc_tab |> select(-Identifier)
meta <- data_frame("cond" = gsub(".*dyn_", "", colnames(fc_tab)),
"sample" = gsub("_[0-9]{1,2}dyn.*", "", colnames(fc_tab)),
"strength" = gsub("^.*_[0-9]_|_[a-z].*$", "", colnames(fc_tab)))Warning: `data_frame()` was deprecated in tibble 1.1.0.
ℹ Please use `tibble()` instead.
rownames(meta) <- colnames(fc_tab)Warning: Setting row names on a tibble is deprecated.
# to get rowData
bulk_expr <- read.csv(file.path("..", "data", "4Donors_FPKM_Laminar_4-over-Oscillatory_4.csv"))
bulk_expr <- bulk_expr[-1,]
count_nam <- grep(".*FPKM",colnames(bulk_expr), value = T)
row_dat_nam <- colnames(bulk_expr)[!colnames(bulk_expr) %in% c("X", "Cluster", count_nam)]
row_dat <- bulk_expr[,c("gene_id", "gene_name")]
rownames(row_dat) <- row_dat$gene_id
row_dat <- row_dat[rownames(fc_tab),]
# Generate summarized experiment
bulk <- SummarizedExperiment(assays=list(counts=as.matrix(fc_tab)),
colData=meta, rowData = row_dat)
dim(bulk)[1] 21493 24
rownames(bulk) <- paste0(rowData(bulk)$gene_id, ".", rowData(bulk)$gene_name)
#Filter genes
bulk <- bulk[rowSums(assay(bulk, "counts")) > 5, ]
dim(bulk)[1] 16779 24
#Filter sample
bulk <- bulk[, !bulk$sample %in% "440z009_2"]
bulk <- bulk[, !bulk$strength %in% "04dyn"]
#DESEQ2 dataset
dds <- DESeq2::DESeqDataSet(bulk, design = ~ sample + cond)Warning in DESeq2::DESeqDataSet(bulk, design = ~sample + cond): some variables
in design formula are characters, converting to factors
dds <- estimateSizeFactors(dds)
vsd <- DESeq2::vst(dds, blind = TRUE)
#DE bulk
DE_bulk <- read.csv(file.path("..", "data","result--Laminar_4--over--Static_4.csv"))
dds <- DESeq(dds)using pre-existing size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing
cont <- c(0,0,0,1,1)
res_de <- results(dds, contrast = cont)
res_de <- res_de[order(res_de$padj), ]Correlation clustifyr
overlap <- intersect(rownames(sce_val), rownames(vsd))
sce_val <- sce_val[overlap,]
vsd <- vsd[overlap,]
tt_overlap <- tt[overlap,]
top_ordered <- tt_overlap[order(tt_overlap$FDR),]
top <- rownames(top_ordered)[1:100]
res <- clustify(
input = as.matrix(logcounts(sce_val)),
metadata = data.frame(colData(sce_val)),
cluster_col = "val_cluster",
ref_mat = assay(vsd),
query_genes = top
)using # of genes: 100
similarity computation completed, matrix of 2 x 9, preparing output
p <- plot_cor_heatmap(cor_mat = res, col = rev(hcl.colors(51, "Reds")))
p
Pb MDS
# | label: pb mds
#pseudo_bulk
pb_val <- aggregateAcrossCells(sce_val,
id=colData(sce_val)[,c("val_cluster", "donor")],
use.assay.type = "logcounts")
colnames(pb_val) <- paste0(pb_val$val_cluster, "_", pb_val$donor)
pb_val_sel <- pb_val[top,]
vsd_sel <- vsd[top,]
com <- cbind(assay(vsd_sel), logcounts(pb_val_sel))
d <- DGEList(com, remove.zeros = TRUE)
mds <- plotMDS.DGEList(d, top = 100)
dist_mat <- dist(t(com))
hclust_avg <- hclust(dist_mat, method = 'ward.D2')
plot(hclust_avg)
Heatmap top DE genes v1v2
top <- rownames(top_ordered)[1:100]
top <- top[!top %in% "ENSG00000139329.LUM"]
vsd_sel <- vsd[top,]
cd <- data.frame("cond" = vsd_sel$cond)
rownames(cd) <- colnames(vsd_sel)
hm <- pheatmap(assay(vsd_sel),
main = "Top DE genes valve2 vs valve1", fontsize = 6,
col = rev(hcl.colors(51, "RdBu")),
scale = "row",
show_colnames = FALSE,
show_rownames = FALSE,
cluster_cols = TRUE,
annotation_col = cd)
hm
Top DE genes bulk
Heatmap
res_sel <- res_de[which(res_de$padj < 0.01),]
res_down <- res_sel[order(res_sel$stat),]
res_up <- res_sel[order(res_sel$stat, decreasing = T),]
top_bulk <- c(rownames(res_up)[1:200], rownames(res_down)[1:200])
top_bulk <- top_bulk[top_bulk %in% rownames(pb_val)]
sub <- logcounts(pb_val)[top_bulk,]
sub <- sub[rowSums(sub) > 0,]
cd <- data.frame("cond" = pb_val$val_cluster,
"donor" = pb_val$donor)
rownames(cd) <- colnames(pb_val)
# Specify colors
ann_colors = list(
cond = c("valve1" = "#849db1", "valve2" = "#ef6f6a"),
donor = c("1.0" = "#4E79A7", "2.0" = "#F28E2B",
"3.0" = "#E15759", "4.0" = "#76B7B2",
"5.0" = "#59A14F", "6.0" = "#EDC948",
"7.0" = "#B07AA1")
)
hm <- pheatmap(as.matrix(sub),
main = "Top flow associated DE genes", fontsize = 6,
col = rev(hcl.colors(50, "RdBu")),
scale = "row",
show_colnames = FALSE,
show_rownames = FALSE,
cluster_cols = TRUE,
cluster_rows = TRUE,
annotation_colors = ann_colors,
show_row_dend = FALSE,
annotation_col = cd)
hm
Heatmap patient subgroups
pb_val1 <- pb_val[,pb_val$donor %in% c("6.0", "5.0", "7.0")]
sub1 <- logcounts(pb_val1)[top_bulk,]
sub1 <- sub1[rowSums(sub1) > 0,]
cd <- data.frame("cond" = pb_val1$val_cluster,
"donor" = pb_val1$donor)
rownames(cd) <- colnames(pb_val1)
hm <- pheatmap(as.matrix(sub1),
main = "Top flow associated DE genes", fontsize = 6,
col = rev(hcl.colors(50, "RdBu")),
scale = "row",
show_colnames = FALSE,
show_rownames = FALSE,
cluster_cols = TRUE,
annotation_colors = ann_colors,
show_row_dend = FALSE,
annotation_col = cd)
hm
pb_val2 <- pb_val[,pb_val$donor %in% c("2.0", "4.0")]
sub2 <- logcounts(pb_val2)[top_bulk,]
sub2 <- sub2[rowSums(sub2) > 0,]
cd <- data.frame("cond" = pb_val2$val_cluster,
"donor" = pb_val2$donor)
rownames(cd) <- colnames(pb_val2)
hm <- pheatmap(as.matrix(sub2),
main = "Top flow associated DE genes", fontsize = 6,
col = rev(hcl.colors(50, "RdBu")),
scale = "row",
show_colnames = FALSE,
show_rownames = FALSE,
cluster_cols = TRUE,
annotation_colors = ann_colors,
show_row_dend = FALSE,
annotation_col = cd)
hm
Gene overlap
bulk <- res_de[overlap,]
bulk <- bulk[which(bulk$padj < 0.01),]
sc <- tt_overlap[tt_overlap$FDR < 0.01,]
myCol <- brewer.pal(3, "Pastel2")
# Chart
venn.diagram(
x = list(bulk = rownames(bulk), sc = rownames(sc)),
category.names = c("Flow" , "valve1/valve2"),
filename = '../out/plots/venn_bulk_sc.png',
output=T,
# Output features
imagetype="png" ,
height = 480 ,
width = 480 ,
resolution = 300,
compression = "lzw",
# Circles
lwd = 2,
lty = 'blank',
fill = myCol[1:2],
# Numbers
cex = .6,
fontface = "bold",
fontfamily = "sans",
# Set names
cat.cex = 0.6,
cat.fontface = "bold",
cat.default.pos = "outer",
cat.pos = c(-15, 5),
cat.fontfamily = "sans",
#rotation = 1
)[1] 1
PCA
sce_val <- runPCA(sce_val, subset_row = top_bulk)
plotReducedDim(sce_val, dimred="PCA", colour_by="val_cluster")
pb_val <- runPCA(pb_val, subset_row = top_bulk)Warning in check_numbers(k = k, nu = nu, nv = nv, limit = min(dim(x)) - : more
singular values/vectors requested than available
Warning in (function (A, nv = 5, nu = nv, maxit = 1000, work = nv + 7, reorth =
TRUE, : You're computing too large a percentage of total singular values, use a
standard svd instead.
plotReducedDim(pb_val, dimred="PCA", colour_by="val_cluster")
Dimplots
gene_list <- c("ALCAM","CD24","ITGA9","CD9","CAV1","SNCG","NEO1","ADAMTS6",
"ADAMTS1","SEMA3D","GJA1","CLDN11","ANGPT2","CLDN5","ADM",
"GJA4","SAT1","ID3","SCG3","NR2F1","APOD")
rowData(sce_val)$gene_short <- rowData(sce_val)$Symbol
dup <- which(duplicated(rowData(sce_val)$gene_short))
rowData(sce_val)$gene_short[dup] <- rownames(sce_val)[dup]
rownames(sce_val) <- rowData(sce_val)$gene_short
dim_list <- lapply(gene_list, function(gene_nam){
p <- plotReducedDim(sce_val, dimred="umap", colour_by=gene_nam,
point_size = 0.8) +
ggtitle(gene_nam)
})
wrap_plots(dim_list, ncol = 3) 
Violinplots
plotExpression(sce_val, features=gene_list,
x="val_cluster", colour_by="val_cluster", ncol = 3)