77# ' @examples
88# ' \dontrun{sc_obj <- InferBatches(sc_obj)}
99# ' @export
10+ # ' @importFrom foreach %dopar%
1011InferBatches <- function (sc_obj , exit_with_code = FALSE , log_flag = FALSE ) {
1112 exit_code <- - 1
1213 sc_obj <- tryCatch(
@@ -27,23 +28,49 @@ InferBatches <- function(sc_obj, exit_with_code = FALSE, log_flag = FALSE) {
2728 print_SPEEDI(" Neighbors exist. Skipping construction of neighborhood graph..." , log_flag )
2829 }
2930 }
30- sc_obj <- find_clusters_SPEEDI(sc_obj , resolution = 0.3 , log_flag = log_flag )
31- if (length(unique(sc_obj $ seurat_clusters )) > 30 ) {
32- sc_obj <- find_clusters_SPEEDI(sc_obj , resolution = 0.2 , log_flag = log_flag )
33- if (length(unique(sc_obj $ seurat_clusters )) > 30 ) {
34- sc_obj <- find_clusters_SPEEDI(sc_obj , resolution = 0.1 , log_flag = log_flag )
35- }
31+
32+ # Set up processing of different resolutions so it's parallel (max = 7 cores)
33+ if (Sys.getenv(" SLURM_NTASKS_PER_NODE" ) == " " ) {
34+ n.cores <- as.numeric(parallel :: detectCores())
35+ } else {
36+ n.cores <- as.numeric(Sys.getenv(" SLURM_NTASKS_PER_NODE" ))
37+ }
38+
39+ if (n.cores > 7 ) {
40+ n.cores <- 7
41+ }
42+
43+ print_SPEEDI(paste0(" Number of cores: " , n.cores ), log_flag )
44+ doParallel :: registerDoParallel(n.cores )
45+ metrics_list <- foreach :: foreach(
46+ i = c(0.1 , 0.15 , 0.2 , 0.25 , 0.3 , 0.35 , 0.4 ),
47+ .combine = ' c' ,
48+ .packages = c(" Seurat" , " bluster" )
49+ ) %dopar % {
50+ tmp <- find_clusters_SPEEDI(sc_obj = sc_obj , resolution = i , method = " Louvain" , log_flag = log_flag )
51+ dims <- 1 : 30
52+ clusters <- tmp $ seurat_clusters
53+ sil.out <- bluster :: approxSilhouette(Seurat :: Embeddings(tmp @ reductions $ pca )[, dims ], clusters )
54+ sil.score <- mean(sil.out $ width )
55+ names(sil.score ) <- i
56+ return (sil.score )
3657 }
58+
59+ max.res <- as.numeric(as.character(names(which.max(metrics_list ))))
60+ print_SPEEDI(paste0(" Resolution = " , max.res ), log_flag = log_flag )
61+
62+ tmp <- find_clusters_SPEEDI(sc_obj = sc_obj , resolution = max.res , method = " Louvain" , log_flag = log_flag )
63+
3764 # Use LISI metric to guess batch labels
38- X <- sc_obj @ reductions $ umap @ cell.embeddings
39- meta_data <- data.frame (sc_obj $ sample )
65+ X <- tmp @ reductions $ umap @ cell.embeddings
66+ meta_data <- data.frame (tmp $ sample )
4067 colnames(meta_data ) <- " batch"
41- meta_data $ cluster <- sc_obj $ seurat_clusters
68+ meta_data $ cluster <- tmp $ seurat_clusters
4269 lisi.res <- data.frame (matrix (ncol = 4 , nrow = 0 ))
4370 colnames(lisi.res ) <- c(" batch" , " score" , " cluster" , " freq" )
44- clusters.interest <- names(table(sc_obj $ seurat_clusters ))[prop.table(table(sc_obj $ seurat_clusters )) > 0.01 ]
45- for (cluster in clusters.interest ) { # levels(sc_obj$seurat_clusters)) {
46- cells <- names(sc_obj $ seurat_clusters [sc_obj $ seurat_clusters == cluster ])
71+ clusters.interest <- names(table(tmp $ seurat_clusters ))[prop.table(table(tmp $ seurat_clusters )) > 0.01 ]
72+ for (cluster in clusters.interest ) {
73+ cells <- names(tmp $ seurat_clusters [tmp $ seurat_clusters == cluster ])
4774 X.sub <- X [which(rownames(X ) %in% cells ),]
4875 meta_data.sub <- meta_data [which(rownames(meta_data ) %in% cells ),]
4976 res <- lisi :: compute_lisi(X.sub , meta_data.sub , label_colnames = " batch" )
@@ -56,13 +83,19 @@ InferBatches <- function(sc_obj, exit_with_code = FALSE, log_flag = FALSE) {
5683 lisi.res <- rbind(lisi.res , agg.res )
5784 }
5885
86+ lisi.res <- lisi.res [lisi.res $ freq > 10 ,]
87+
5988 p.values <- list ()
6089 used.sample.dump <- c()
6190 batch.assign <- list ()
6291 for ( i in clusters.interest ) {
6392 lisi.res.sub <- lisi.res [lisi.res $ cluster == i ,]
64- lisi.res.sub $ batch_count <- as.numeric(as.character(table(sc_obj $ sample )[lisi.res.sub $ batch ]))
65- lisi.res.sub $ scaled.score <- (lisi.res.sub $ score / max(lisi.res.sub $ score )) * (lisi.res.sub $ freq / lisi.res.sub $ batch_count )
93+ lisi.res.sub $ batch_count <- as.numeric(as.character(table(tmp $ sample )[lisi.res.sub $ batch ]))
94+
95+ lisi.res.sub $ scaled.score <- mapply(
96+ geometric.mean ,
97+ (max(lisi.res.sub $ score ) / lisi.res.sub $ score ),
98+ (lisi.res.sub $ freq / lisi.res.sub $ batch_count ))
6699
67100 if (max(lisi.res.sub $ score ) < = 1.01 ) {
68101 lisi.res.sub <- lisi.res.sub [order(lisi.res.sub $ score , decreasing = TRUE ),]
@@ -78,30 +111,28 @@ InferBatches <- function(sc_obj, exit_with_code = FALSE, log_flag = FALSE) {
78111 if (dim(lisi.res.sub )[1 ] > 30 ) {
79112 lisi.res.sub <- lisi.res.sub [1 : 30 ,]
80113 }
114+
81115 lisi.res.sub $ diff.scaled.score <- abs(c(diff(lisi.res.sub $ scaled.score ), 0 ))
82116
83- if (dim(lisi.res.sub )[1 ] > = 3 ) {
117+ if (dim(lisi.res.sub )[1 ] > = 3 & sum( lisi.res.sub $ diff.scaled.score ) != 0 ) {
84118 p.values [[i ]] <- outliers :: dixon.test(lisi.res.sub $ diff.scaled.score )$ p.value [[1 ]]
85119 } else {
86120 p.values [[i ]] <- 1
87121 }
88122
89- if (p.values [[i ]] > 0.05 & dim(lisi.res.sub )[1 ] > = 3 ) {
90- lisi.res.sub <- lisi.res.sub [- which.max(lisi.res.sub $ diff.scaled.score ),]
91- p.values [[i ]] <- outliers :: dixon.test(lisi.res.sub $ diff.scaled.score )$ p.value [[1 ]]
92- }
93-
94123 if (p.values [[i ]] < 0.05 & dim(lisi.res.sub )[1 ] > = 3 ) {
95124 max.index <- which.max(lisi.res.sub $ diff.scaled.score )
96125 samples.of.batch <- lisi.res.sub $ batch [1 : max.index ]
97126
98127 if (any(samples.of.batch %in% used.sample.dump )) {
128+
99129 if (! all(samples.of.batch %in% used.sample.dump )) {
100130 used.index <- which(samples.of.batch %in% used.sample.dump )
101131 samples.of.batch <- samples.of.batch [- used.index ]
102132 if (length(samples.of.batch ) > 0 ) {
103133 batch.assign <- lappend(batch.assign , samples.of.batch )
104134 }
135+
105136 } else if (! list (samples.of.batch ) %in% batch.assign ) {
106137 if (length(samples.of.batch ) == 1 ) {
107138 batch.assign <- lappend(batch.assign , samples.of.batch )
@@ -118,25 +149,27 @@ InferBatches <- function(sc_obj, exit_with_code = FALSE, log_flag = FALSE) {
118149 }
119150 used.sample.dump <- union(used.sample.dump , samples.of.batch )
120151 }
152+
121153 }
122154 }
123155
124- batch <- as.factor(sc_obj $ sample )
156+ batch <- as.factor(tmp $ sample )
157+
125158 if (length(batch.assign ) > 0 ) {
126159 levels.batch <- levels(batch )
127160 for (i in 1 : length(batch.assign )) {
128161 levels.batch [which(levels(batch ) %in% batch.assign [[i ]])] <- i
129162 }
130163 levels.batch [! levels.batch %in% c(1 : length(batch.assign ))] <- length(batch.assign )+ 1
131164 levels(batch ) <- levels.batch
132- sc_obj $ batch <- as.character(batch )
165+ tmp $ batch <- as.character(batch )
133166 } else {
134167 print_SPEEDI(" No batch effect detected!" , log_flag )
135- sc_obj $ batch <- " No Batch"
168+ tmp $ batch <- " No Batch"
136169 }
137- print_SPEEDI(paste0(" Total batches detected: " , length(unique(sc_obj $ batch ))), log_flag )
170+ print_SPEEDI(paste0(" Total batches detected: " , length(unique(tmp $ batch ))), log_flag )
138171 print_SPEEDI(" Step 5: Complete" , log_flag )
139- return (sc_obj )
172+ return (tmp )
140173 },
141174 error = function (cond ) {
142175 if (exit_code == - 1 ) {
@@ -228,8 +261,7 @@ IntegrateByBatch_RNA <- function(sc_obj, exit_with_code = FALSE, log_flag = FALS
228261
229262 print_SPEEDI(" Beginning integration" , log_flag )
230263 integrated_obj <- Seurat :: IntegrateData(anchorset = anchors ,
231- normalization.method = " SCT" ,
232- k.weight = 100 )
264+ normalization.method = " SCT" )
233265 Seurat :: DefaultAssay(integrated_obj ) <- " integrated"
234266
235267 rm(sc_obj_list )
@@ -347,7 +379,7 @@ IntegrateByBatch_ATAC <- function(proj, output_dir = getwd(), exit_with_code = F
347379 obj <- Seurat :: CreateSeuratObject(tmp , project = ' scATAC' , min.cells = 0 , min.features = 0 )
348380 obj [[reducedDims_param ]] <- Seurat :: CreateDimReducObject(embeddings = tile_reduc , key = paste0(reducedDims_param , " _" ), assay = ' RNA' )
349381 obj <- Seurat :: FindNeighbors(obj , reduction = reducedDims_param , dims = 1 : 29 )
350- obj <- find_clusters_SPEEDI(obj , resolution = 2 , log_flag = log_flag )
382+ obj <- find_clusters_SPEEDI(obj , resolution = 2 , method = " Leiden " , log_flag = log_flag )
351383 obj <- Seurat :: RunUMAP(obj , reduction = reducedDims_param , dims = 1 : 29 , seed.use = get_speedi_seed())
352384 proj <- ArchR :: addCellColData(
353385 ArchRProj = proj ,
@@ -422,7 +454,7 @@ VisualizeIntegration <- function(sc_obj, output_dir = getwd(), exit_with_code =
422454 } else {
423455 print_SPEEDI(" Neighbors exist. Skipping constructing neighborhood graph..." , log_flag )
424456 }
425- sc_obj <- find_clusters_SPEEDI(sc_obj = sc_obj , resolution = 2 , log_flag = log_flag )
457+ sc_obj <- find_clusters_SPEEDI(sc_obj = sc_obj , resolution = 2 , method = " Leiden " , log_flag = log_flag )
426458 sample_count <- length(unique(sc_obj $ sample ))
427459 cell_count <- length(sc_obj $ cell_name )
428460 # Plot by cluster
0 commit comments