Package and data

Packages

library(raster)
library(plyr)
library(ggpubr)
library(spatstat)
library(ape)
library(viridis)
library(ggthemes)
source("../R/image_analysis_functions.R")

MIBI-TOF data

mibi.sce <- readRDS("data/mibi.sce_withDR.rds")
colnames(mibi.sce) <- paste(mibi.sce$SampleID, mibi.sce$cellLabelInImage, sep = "_")

mibi.sce$cellTypes <- ifelse(as.character(mibi.sce$immune_group) != "not immune",
                             as.character(mibi.sce$immune_group),
                             as.character(mibi.sce$tumor_group))

mibi.sce$cellTypes_group <- ifelse(as.character(mibi.sce$immune_group) != "not immune",
                                   "Micro-environment",
                                   "Tumour")


selected_chanel_mibi <- rownames(mibi.sce)[rowData(mibi.sce)$is_protein == 1]
interested_protein <- rownames(mibi.sce)[rowData(mibi.sce)$is_protein == 1]
interested_protein <- interested_protein[!interested_protein %in% c("OX40")]
interested_protein <- c("SMA", interested_protein)


# tiff_name_list <- list.files("../../sc-targeted-proteomics/data/TNBC_shareCellData/", pattern = ".tiff")
# tiff_name_list <- tiff_name_list[-24]
cellTypes_group_mibi_color <- tableau_color_pal("Tableau 10")(length(unique(mibi.sce$cellTypes_group)))
cellTypes_group_mibi_color <- c(cellTypes_group_mibi_color, "black")
names(cellTypes_group_mibi_color) <- c(unique(mibi.sce$cellTypes_group)[c(2,1)],
                                       "Background")

cellTypes_mibi_color <- tableau_color_pal("Classic 20")(length(unique(mibi.sce$cellTypes)))
cellTypes_mibi_color <- c(cellTypes_mibi_color, "black")
names(cellTypes_mibi_color) <- c(unique(mibi.sce$cellTypes), "Background")

Visualisation of the image

# example
str_name <- "data/p34_labeledcellData.tiff"


sample_id <- as.numeric(gsub("data/p", "", gsub("_labeledcellData.tiff", "", str_name)))

r <- raster(str_name)
# create an rater object
str_name <- "data/p34_labeledcellData.tiff"


sample_id <- as.numeric(gsub("data/p", "", gsub("_labeledcellData.tiff", "", str_name)))

r <- readRDS("data/p34_labeledcellData_RasterLayer.rds")

p_sce <- mibi.sce[, mibi.sce$SampleID == sample_id]
p_sce <- p_sce[interested_protein, ]
exprsMat <- assay(p_sce, "mibi_exprs")


group_raster_values <- mapValueToCoord(r,
                                       p_sce$cellLabelInImage,
                                       p_sce$cellTypes_group)


cellTypes_raster_values <- mapValueToCoord(r,
                                           p_sce$cellLabelInImage,
                                           p_sce$cellTypes)






ddf <- rasterToPoints(r)
ddf <- data.frame(ddf)
colnames(ddf) <- c("X", "Y", "value")
ddf$cellType_group <- group_raster_values
ddf$cellType <- cellTypes_raster_values

g_cellGroup <- ggplot(NULL) +
  geom_raster(data = ddf, aes(X, Y, fill = as.factor(cellType_group))) +
  theme_minimal() +
  scale_fill_manual(values = cellTypes_group_mibi_color) +
  coord_quickmap() +
  theme(aspect.ratio = 1, legend.position = "right") +
  labs(fill = "Cell Type")

g_cellTypes <- ggplot(NULL) +
  geom_raster(data = ddf, aes(X, Y, fill = as.factor(cellType))) +
  theme_minimal() +
  scale_fill_manual(values = cellTypes_mibi_color) +
  coord_quickmap() +
  theme(aspect.ratio = 1, legend.position = "right") +
  labs(fill = "Cell Type")

ggarrange(g_cellGroup, g_cellTypes, ncol = 2, nrow = 1, align = "hv")

Spatial features

Create ppp object

coord_r <- rasterToPoints(r)
center_r_x <- aggregate(coord_r[, 1], list(coord_r[, 3]), median)
group <- center_r_x$Group.1
center_r_x <- center_r_x$x
center_r_y <- aggregate(coord_r[, 2], list(coord_r[, 3]), median)$x
center_r <- data.frame(x = center_r_x, y = center_r_y, group = group)
cell_label <- raster::values(r)
notInLabelnotInLabel <- unique(cell_label[!cell_label %in% p_sce$cellLabelInImage])


r_cellType <- mapvalues((center_r$group),
                        from = notInLabelnotInLabel,
                        to = rep(0, length(notInLabelnotInLabel)))

r_cellType <- mapvalues((r_cellType),
                        from = p_sce$cellLabelInImage,
                        to = p_sce$cellTypes)

center_r$cellTypes <- r_cellType

center_r <- center_r[center_r$cellTypes != "0", ]



keep <- center_r$cellTypes != "Unidentified"

cell_points <- ppp(x = center_r[keep, 1],
                   y = center_r[keep, 2],
                   check = FALSE,
                   yrange = c(0, 2048), xrange = c(0, 2048),
                   marks = as.factor(center_r[keep, ]$cellTypes))


cell_points_cts <- ppp(x = center_r[, 1], y = center_r[, 2],
                       check = FALSE,
                       yrange = c(0, round(max(coord_r[, 2]))),
                       xrange = c(0, round(max(coord_r[, 1]))),
                       marks = t(exprsMat))

Moran’s I

d <- pairdist(cell_points_cts, squared = FALSE)
diag(d) <- Inf

print("Calculate moran'stats")
w <- 1/d

moran_cor <- list()
for (i in 1:nrow(exprsMat)) {
  cat(i, "...")
  moran_cor[[i]] <- try(ape::Moran.I(exprsMat[i, ], w)$observed, silent = TRUE)
  if (is(moran_cor[[i]], "try-error")) {
    moran_cor[[i]] <- NA
  }
}
names(moran_cor) <- rownames(exprsMat)
moran_cor <- unlist(moran_cor)
sort(moran_cor)
exprsMat_in_raster <- apply(exprsMat, 1, function(x) {
  mapValueToCoord(r,
                  p_sce$cellLabelInImage,
                  x,
                  cont = TRUE)
})


g_MPO <- ggplot(NULL) +
  geom_raster(data = ddf, aes(X, Y,
                              fill = scale(exprsMat_in_raster[, "MPO"]))) +
  theme_minimal() +
  scale_fill_gradientn(colours = viridis::viridis(120)[c(seq(1, 40, 2), 41:120)]) +
  coord_quickmap() +
  theme(aspect.ratio = 1, legend.position = "right") +
  labs(fill = "MPO")


g_CD8 <- ggplot(NULL) +
  geom_raster(data = ddf, aes(X, Y,
                              fill = scale(exprsMat_in_raster[, "CD8"]))) +
  theme_minimal() +
  scale_fill_gradientn(colours = viridis::viridis(120)[c(seq(1, 40, 2), 41:120)]) +
  coord_quickmap() +
  theme(aspect.ratio = 1, legend.position = "right") +
  labs(fill = "CD8")

ggarrange(g_MPO, g_CD8, align = "hv", ncol = 2)

Nearest Neighbour Correlation

nncorr_protein <- nncorr(cell_points_cts)["correlation", ]
nncorr_protein
nn_which_res <- nnwhich(cell_points_cts)

dtp <- data.frame(MPO = marks(cell_points_cts)[, "MPO"],
                  MPO_NN_k1 = marks(cell_points_cts)[nn_which_res, "MPO"],
                  CD8 = marks(cell_points_cts)[, "CD8"],
                  CD8_NN_k1 = marks(cell_points_cts)[nn_which_res, "CD8"])

g1 <- ggplot(dtp, aes(x = MPO, y = MPO_NN_k1)) +
  geom_point(alpha = 0.5, size = 2) +
  theme_bw() +
  scale_color_viridis_c() +
  theme(aspect.ratio = 1, legend.position = "right",
        panel.grid = element_blank(),
        text = element_text(size = 14)) +
  ylab("MPO (1NN)")

g2 <- ggplot(dtp, aes(x = CD8, y = CD8_NN_k1)) +
  geom_point(alpha = 0.5, size = 2) +
  theme_bw() +
  scale_color_viridis_c() +
  theme(aspect.ratio = 1, legend.position = "right",
        panel.grid = element_blank(),
        text = element_text(size = 14)) +
  ylab("CD8 (1NN)")

ggarrange(g1, g2, ncol = 2, nrow = 1, align = "hv")

Cell type interaction composition

tab <- table(center_r[keep, ]$cellTypes)
cellTypes_toTest <- names(tab[which(tab > 10)])
cellTypes_pair <- expand.grid(cellTypes_toTest, cellTypes_toTest,
                              stringsAsFactors = FALSE)


print("Calculate NN info")

# Calcualte the pairwise distance
d <- pairdist(cell_points, squared = FALSE)
diag(d) <- Inf

nn_list <- apply(d, 1, function(x) which(x < 50))

nn_list_cellTypes <- lapply(seq_along(nn_list), function(idx) {
  if (length(nn_list[[idx]]) > 0) {
    paste(center_r[keep, ]$cellTypes[idx],
          center_r[keep, ]$cellTypes[nn_list[[idx]]], sep = "_")
  }
})

nn_list_cellTypes <- unlist(nn_list_cellTypes)
nn_list_cellTypes <- rearrange_string(nn_list_cellTypes)
nn_list_cellTypes <- table(nn_list_cellTypes)
cat("Top 10 cell type interaction")
sort(nn_list_cellTypes/sum(nn_list_cellTypes), decreasing = TRUE)[1:10]

L function

print("Calculate L'stats (cellType1)")

L_patient <- list()
for (i in 1:nrow(cellTypes_pair)) {
  # cat(cellTypes_pair[i,], "...")
  L_patient[[i]] <- L_stats(cell_points,
                            from = cellTypes_pair[i, 1],
                            to = cellTypes_pair[i, 2],
                            L_dist = 50)
}

L_patient <- do.call(c, L_patient)
names(L_patient) <- paste(cellTypes_pair[, 1], cellTypes_pair[, 2], sep = "_")
L <- spatstat::Lcross(cell_points,
                      from = "Neutrophils",
                      to = "Neutrophils",
                      verbose = FALSE,
                      correction = "best")

L_env <- envelope(cell_points, Lcross,
                  from = "Neutrophils",
                  to = "Neutrophils")
plot(L_env, xlim = c(0, 200), legend = FALSE)



L <- spatstat::Lcross(cell_points,
                      from = "Neutrophils",
                      to = "Keratin-positive tumor",
                      verbose = FALSE,
                      correction = "best")

L_env <- envelope(cell_points, Lcross,
                  from = "Neutrophils",
                  to = "Keratin-positive tumor")

plot(L_env, xlim = c(0, 200), legend = FALSE)
# The following for loop will generate the spatial features for all images 
# in MIBI-TOF data

spat_stats_list_all <- list()


for (s in 1:length(tiff_name_list)) {

  str_name <- paste("../../data/TNBC_shareCellData/", tiff_name_list[s], sep = "")

  sample_id <- as.numeric(gsub("p", "", gsub("_labeledcellData.tiff", "", tiff_name_list[s])))

  print(str_name)
  print(sample_id)
  r <- raster(str_name)
  r

  p_sce <- mibi.sce[, mibi.sce$SampleID == sample_id]
  p_sce <- p_sce[interested_protein, ]
  exprsMat <- assay(p_sce, "mibi_exprs")



  print(table(p_sce$immune_group))


  group_raster_values <- mapValueToCoord(r,
                                         p_sce$cellLabelInImage,
                                         p_sce$cellTypes_group)


  cellTypes_raster_values <- mapValueToCoord(r,
                                             p_sce$cellLabelInImage,
                                             p_sce$cellTypes)






  ddf <- rasterToPoints(r)
  ddf <- data.frame(ddf)
  colnames(ddf) <- c("X", "Y", "value")
  ddf$cellType_group <- group_raster_values
  ddf$cellType <- cellTypes_raster_values




  coord_r <- rasterToPoints(r)
  center_r_x <- aggregate(coord_r[, 1], list(coord_r[, 3]), median)
  group <- center_r_x$Group.1
  center_r_x <- center_r_x$x
  center_r_y <- aggregate(coord_r[, 2], list(coord_r[, 3]), median)$x

  center_r <- data.frame(x = center_r_x, y = center_r_y, group = group)

  cell_label <- raster::values(r)
  notInLabelnotInLabel <- unique(cell_label[!cell_label %in% p_sce$cellLabelInImage])




  r_cellType_group <- mapvalues((center_r$group),
                                from = notInLabelnotInLabel,
                                to = rep(0, length(notInLabelnotInLabel)))

  r_cellType_group <- mapvalues((r_cellType_group),
                                from = p_sce$cellLabelInImage,
                                to = p_sce$cellTypes_group)

  center_r$cellTypes <- r_cellType_group

  r_cellType <- mapvalues((center_r$group),
                          from = notInLabelnotInLabel,
                          to = rep(0, length(notInLabelnotInLabel)))

  r_cellType <- mapvalues((r_cellType),
                          from = p_sce$cellLabelInImage,
                          to = p_sce$cellTypes)

  center_r$cellTypes2 <- r_cellType



  center_r <- center_r[center_r$cellTypes != "0", ]


  print("For oringinal label")

  keep <- center_r$cellTypes2 != "Unidentified"

  cell_points <- ppp(x = center_r[keep, 1],
                     y = center_r[keep, 2],
                     check = FALSE,
                     yrange = c(0, 2048), xrange = c(0, 2048),
                     marks = as.factor(center_r[keep, ]$cellTypes2))

  tab <- table(center_r[keep, ]$cellTypes2)
  cellTypes_toTest <- names(tab[which(tab > 10)])
  cellTypes_pair <- expand.grid(cellTypes_toTest, cellTypes_toTest,
                                stringsAsFactors = FALSE)


  print("Calculate NN info")

  # Calcualte the pairwise distance
  d <- pairdist(cell_points, squared = FALSE)
  diag(d) <- Inf

  nn_list <- apply(d, 1, function(x) which(x < 50))

  nn_list_cellTypes <- lapply(seq_along(nn_list), function(idx) {
    if (length(nn_list[[idx]]) > 0) {
      paste(center_r[keep, ]$cellTypes2[idx],
            center_r[keep, ]$cellTypes2[nn_list[[idx]]], sep = "_")
    }
  })

  nn_list_cellTypes <- unlist(nn_list_cellTypes)
  nn_list_cellTypes <- rearrange_string(nn_list_cellTypes)
  nn_list_cellTypes <- table(nn_list_cellTypes)


  print("Calculate L'stats (cellType1)")

  L_patient <- list()
  for (i in 1:nrow(cellTypes_pair)) {
    # cat(cellTypes_pair[i,], "...")
    L_patient[[i]] <- L_stats(cell_points,
                              from = cellTypes_pair[i, 1],
                              to = cellTypes_pair[i, 2],
                              L_dist = 50)
  }

  L_patient <- do.call(c, L_patient)
  names(L_patient) <- paste(cellTypes_pair[, 1], cellTypes_pair[, 2], sep = "_")

  print("Calculate ANN")

  ann <- list()

  for (i in 1:length(tab)) {
    center_r_tmp <- center_r[keep, ][center_r$cellTypes2 == names(tab)[i], ]
    cell_points_tmp <- ppp(x = center_r_tmp[, 1],
                           y = center_r_tmp[, 2],
                           check = FALSE,
                           yrange = c(0, round(max(coord_r[, 2]))),
                           xrange = c(0, round(max(coord_r[, 1]))),
                           marks = as.factor(center_r_tmp$cellTypes2))

    ann[[i]] <- nndist(cell_points_tmp, k = 1:10)
  }

  names(ann) <- names(tab)








  cell_points_cts <- ppp(x = center_r[, 1], y = center_r[, 2], check = FALSE,
                         yrange = c(0, round(max(coord_r[, 2]))),
                         xrange = c(0, round(max(coord_r[, 1]))),
                         marks = t(exprsMat))
  nncorr_protein <- nncorr(cell_points_cts)["correlation", ]
  d <- pairdist(cell_points_cts, squared = FALSE)
  diag(d) <- Inf

  print("Calculate moran'stats")
  w <- 1/d

  moran_cor <- list()
  for (i in 1:nrow(exprsMat)) {
    cat(i, "...")
    moran_cor[[i]] <- try(ape::Moran.I(exprsMat[i, ], w)$observed, silent = TRUE)
    if (is(moran_cor[[i]], "try-error")) {
      moran_cor[[i]] <- NA
    }
  }
  names(moran_cor) <- rownames(exprsMat)
  moran_cor <- unlist(moran_cor)



  spat_stats_list_all[[s]] <- list(moran_cor = moran_cor,
                                   nncorr_protein = nncorr_protein,
                                   L_patient = L_patient,
                                   ann = ann,
                                   nn_list_cellTypes = nn_list_cellTypes)

}

sid_list <- c()
for (s in 1:length(tiff_name_list)) {

  str_name <- paste("../../data/TNBC_shareCellData/", tiff_name_list[s], sep = "")

  sample_id <- as.numeric(gsub("p", "", gsub("_labeledcellData.tiff", "", tiff_name_list[s])))
  sid_list <- append(sid_list, sample_id)
}

names(spat_stats_list_all) <- as.character(sid_list)
saveRDS(spat_stats_list_all, file = "output/mibi_spat_stats_list_all.rds")

Session Information

## R version 4.0.2 (2020-06-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-openmp/libopenblasp-r0.3.8.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] SingleCellExperiment_1.11.6 SummarizedExperiment_1.19.6
##  [3] DelayedArray_0.15.7         matrixStats_0.56.0         
##  [5] Matrix_1.2-18               Biobase_2.49.0             
##  [7] GenomicRanges_1.41.5        GenomeInfoDb_1.25.8        
##  [9] IRanges_2.23.10             S4Vectors_0.27.12          
## [11] BiocGenerics_0.35.4         ggthemes_4.2.0             
## [13] viridis_0.5.1               viridisLite_0.3.0          
## [15] ape_5.4                     spatstat_1.64-1            
## [17] rpart_4.1-15                nlme_3.1-148               
## [19] spatstat.data_1.4-3         ggpubr_0.4.0               
## [21] ggplot2_3.3.2               plyr_1.8.6                 
## [23] raster_3.3-13               sp_1.4-2                   
## 
## loaded via a namespace (and not attached):
##  [1] bitops_1.0-6           fs_1.5.0               rprojroot_1.3-2       
##  [4] tools_4.0.2            backports_1.1.8        R6_2.4.1              
##  [7] mgcv_1.8-31            colorspace_1.4-1       withr_2.2.0           
## [10] tidyselect_1.1.0       gridExtra_2.3          curl_4.3              
## [13] compiler_4.0.2         desc_1.2.0             scales_1.1.1          
## [16] pkgdown_1.5.1          goftest_1.2-2          stringr_1.4.0         
## [19] digest_0.6.25          foreign_0.8-80         spatstat.utils_1.17-0 
## [22] rmarkdown_2.3          rio_0.5.16             XVector_0.29.3        
## [25] pkgconfig_2.0.3        htmltools_0.5.0        rlang_0.4.7           
## [28] readxl_1.3.1           generics_0.0.2         dplyr_1.0.1           
## [31] zip_2.0.4              car_3.0-8              RCurl_1.98-1.2        
## [34] magrittr_1.5           GenomeInfoDbData_1.2.3 Rcpp_1.0.5            
## [37] munsell_0.5.0          abind_1.4-5            lifecycle_0.2.0       
## [40] stringi_1.4.6          yaml_2.2.1             carData_3.0-4         
## [43] zlibbioc_1.35.0        MASS_7.3-51.6          grid_4.0.2            
## [46] forcats_0.5.0          crayon_1.3.4           deldir_0.1-28         
## [49] lattice_0.20-41        haven_2.3.1            splines_4.0.2         
## [52] tensor_1.5             hms_0.5.3              knitr_1.29            
## [55] pillar_1.4.6           ggsignif_0.6.0         codetools_0.2-16      
## [58] glue_1.4.1             evaluate_0.14          data.table_1.13.0     
## [61] vctrs_0.3.2            cellranger_1.1.0       gtable_0.3.0          
## [64] purrr_0.3.4            polyclip_1.10-0        tidyr_1.1.1           
## [67] assertthat_0.2.1       xfun_0.16              openxlsx_4.1.5        
## [70] broom_0.7.0            rstatix_0.6.0          tibble_3.0.3          
## [73] memoise_1.1.0          ellipsis_0.3.1