# Benchmark for SVG detections
## Evaluate SVG
**NOTE:** To install other packages such as SpatialDE/hotspot/SpaGFT, please refer to their official site.
:::{dropdown} SpaGFT
```python
import pandas as pd
import numpy as np
import scanpy as sc
import SpaGFT as spg
def process_h5ad_file(file):
spagft_svg(file)
def spagft_svg(file):
adata = sc.read_h5ad(file)
sc.pp.filter_genes(adata, min_cells=50)
name = os.path.basename(file)
pre = name.split('.')[0]
adata.obs.loc[:, ['array_row', 'array_col']] = adata.obsm['spatial']
(ratio_low, ratio_high) = spg.gft.determine_frequency_ratio(adata, ratio_neighbors=1)
df_res = spg.detect_svg(adata,
spatial_info=['array_row', 'array_col'],
ratio_low_freq=ratio_low,
ratio_high_freq=ratio_high,
ratio_neighbors=1,
filter_peaks=True,
S=6)
df_res = df_res.loc[adata.var_names]
df = df_res.sort_values(by='svg_rank', ascending=True)
df.to_csv('./benchmark/spagft_' + pre + '_50.csv')
if __name__ == '__main__':
current_dir = 'I://mm10'
for root, _, files in os.walk(current_dir):
for file in files:
if file.endswith('h5ad'):
h5ad_file_path = os.path.join(root, file)
# Process the H5AD file here
process_h5ad_file(h5ad_file_path)
```
:::
:::{dropdown} scGCO
```python
import pandas as pd
import numpy as np
import scanpy as sc
from scGCO import *
def process_h5ad_file(file):
adata = sc.read_h5ad(file)
name = os.path.basename(file)
pre = name.split('.')[0]
sc.pp.filter_genes(adata, min_cells=50)
data = pd.DataFrame(
adata.X.todense(), columns=adata.var_names, index=adata.obs_names
)
data_norm = normalize_count_cellranger(data)
exp = data.iloc[:, 0]
locs = adata.obsm['spatial'].copy()
cellGraph = create_graph_with_weight(locs, exp)
gmmDict= gmm_model(data_norm)
df_res = identify_spatial_genes(locs, data_norm, cellGraph,gmmDict)
df_res = df_res.loc[adata.var_names]
df = pd.concat([df_res, adata.var], axis=1)
df.to_csv('/data/home/pssun/scgco_' + pre + '_50.csv')
if __name__ == '__main__':
current_dir = '/data/home/pssun/mm10/'
for root, _, files in os.walk(current_dir):
for file in files:
if file.endswith('h5ad'):
h5ad_file_path = os.path.join(root, file)
# Process the H5AD file here
process_h5ad_file(h5ad_file_path)
```
:::
:::{dropdown} nnSVG
```R
library(nnSVG)
library(anndata)
library(SpatialExperiment)
library(scran)
reticulate::use_condaenv("base", required = TRUE)
csv_files <- list.files(path = '/data/home/st/', pattern = "\\.h5ad$", full.names = TRUE)
out_dir <- "/data/home/result/"
for (file in csv_files) {
file_name <- tools::file_path_sans_ext(basename(file))
print(file_name)
adata <- anndata::read_h5ad(file)
counts <- t(as.matrix(adata$X))
colnames(counts) <- adata$obs_names
rownames(counts) <- adata$var_names
loc <- as.data.frame(adata$obsm[['spatial']])
row_data = adata$var
row_data$gene_id = rownames(row_data)
row_data$feature_type = "Gene Expression"
colnames(loc) <- c("x", "y")
rownames(loc) <- colnames(counts)
spe <- SpatialExperiment(
assays = list(counts = counts),
rowData = row_data,
colData = loc,
spatialCoordsNames = c("x", "y"))
spe <- computeLibraryFactors(spe)
spe <- logNormCounts(spe)
# filter any new zeros created after filtering low-expressed genes
# remove genes with zero expression
# remove spots with zero expression
ix_zero_spots <- colSums(counts(spe)) == 0
table(ix_zero_spots)
if (sum(ix_zero_spots) > 0) {
spe <- spe[, !ix_zero_spots]
}
dim(spe)
ix_zero_genes <- rowSums(counts(spe)) <= 50
table(ix_zero_genes)
if (sum(ix_zero_genes) > 0) {
spe <- spe[!ix_zero_genes, ]
}
dim(spe)
print('Start')
spe <- nnSVG(spe, n_threads=48)
df <- rowData(spe)
output_file <- file.path(out_dir, paste0("nnsvg_", file_name, ".csv"))
write.csv(df, file=output_file, quote=FALSE)
}
```
:::
:::{dropdown} transfer_h5ad_to_R
```python
import scanpy as sc
import pandas as pd
import os
from STMiner import SPFinder
current_dir = 'I://ST' #put your h5ad file here!!!
min_cell = 500
for root, _, files in os.walk(current_dir):
for file in files:
if file.endswith('h5ad'):
h5ad_file_path = os.path.join(root, file)
# Process the H5AD file here
sp = SPFinder()
sp.read_h5ad(h5ad_file_path, bin_size=1)
sc.pp.filter_genes(sp.adata, min_cells=min_cell)
x_coords = sp.adata.obs['x'].values # x
y_coords = sp.adata.obs['y'].values # y
column_names = [f"{int(x)}x{int(y)}" for x, y in zip(x_coords, y_coords)]
dense_matrix = sp.adata.X.T.toarray()
gene_spot_matrix = pd.DataFrame(dense_matrix, index=sp.adata.var_names, columns=column_names)
filename = file.replace('.h5ad','')
gene_spot_matrix.to_csv(f"G://sparkx/{filename}_{min_cell}.csv")
```
:::
:::{dropdown} SPARK
```R
library(SPARK)
library(Matrix)
dir_path <- "/data/home/st/"
csv_files <- list.files(path = dir_path, pattern = "\\.csv$", full.names = TRUE) # output dir of transfer_h5ad_to_R
out_dir <- "/data/home/result/"
for (file in csv_files) {
count <- read.csv(file, row.names = 1, check.names = FALSE)
file_name <- tools::file_path_sans_ext(basename(file))
print(file_name)
tmp <- as.matrix(count)
info <- cbind.data.frame(
x = as.numeric(sapply(strsplit(colnames(tmp), split = "x"), "[", 1)),
y = as.numeric(sapply(strsplit(colnames(tmp), split = "x"), "[", 2)),
total_counts = apply(tmp, 2, sum)
)
rownames(info) <- colnames(tmp)
location <- as.matrix(info)
counts_sparse <- as.matrix(tmp)
spark <- CreateSPARKObject(counts=count, percentage = 0, min_total_counts = 0, location=info[, 1:2])
spark@lib_size <- apply(spark@counts, 2, sum)
spark <- spark.vc(spark,
covariates = NULL,
lib_size = spark@lib_size,
num_core = 24,
verbose = F)
spark <- spark.test(spark,
check_positive = T,
verbose = F)
df <- as.data.frame(spark@res_mtest)
output_file <- file.path(out_dir, paste0("spark_", file_name, ".csv"))
write.csv(df, file=output_file, quote=FALSE)
df[is.na(df)] <- 1
print('-----------------------------------------')
}
```
:::
:::{dropdown} trendsceek
```R
library(trendsceek)
library(Matrix)
dir_path <- "G://trendsceek/"
csv_files <- list.files(path = dir_path, pattern = "\\.csv$", full.names = TRUE)
out_dir <- "E://benchmark"
for (file in csv_files) {
count <- read.csv(file, row.names = 1, check.names = FALSE)
file_name <- tools::file_path_sans_ext(basename(file))
print(file_name)
counts_filt <- genefilter_exprmat(count,
min.expr = 3,
min.ncells.expr = 50)
vargenes_stats <- calc_varstats(counts_filt,
counts_filt,
quant.cutoff = 0.5,
method = "glm"
)
topvar.genes <- rownames(vargenes_stats[["real.stats"]])[1:2000]
output_file <- file.path(out_dir, paste0("trendsceek_", file_name, ".csv"))
write.csv(topvar.genes, output_file)
}
```
:::
:::{dropdown} SPARKX
```R
library(SPARK)
library(Matrix)
dir_path <- "G://sparkx/"
csv_files <- list.files(path = dir_path, pattern = "\\.csv$", full.names = TRUE)
out_dir <- "E://benchmark"
for (file in csv_files) {
count <- read.csv(file, row.names = 1, check.names = FALSE)
file_name <- tools::file_path_sans_ext(basename(file))
print(file_name)
tmp <- as.matrix(count)
info <- cbind.data.frame(
x = as.numeric(sapply(strsplit(colnames(tmp), split = "x"), "[", 1)),
y = as.numeric(sapply(strsplit(colnames(tmp), split = "x"), "[", 2)),
total_counts = apply(tmp, 2, sum)
)
rownames(info) <- colnames(tmp)
location <- as.matrix(info)
counts_sparse <- as.matrix(tmp)
sparkX <- sparkx(counts_sparse, location, numCores = 1, option = "mixture")
df <- as.data.frame(sparkX$res_mtest)
df <- df[order(df$adjustedPval), ]
output_file <- file.path(out_dir, paste0("sparkx_", file_name, ".csv"))
write.csv(df, output_file)
}
:::
:::{dropdown} SpatialDE/SpatialDE2/Hotspot/STMiner
```python
import os
import NaiveDE
import SpatialDE
import hotspot
import scanpy as sc
import pandas as pd
import numpy as np
from STMiner import SPFinder
def process_h5ad_file(file):
hotspot_svg(file)
spatial_de_svg(file)
stminer_svg(file)
def stminer_svg(file):
name = os.path.basename(file)
print(name)
pre = name.split('.')[0]
sp = SPFinder()
sp.read_h5ad(file, bin_size=1)
sp.get_genes_csr_array(min_cells=50, log1p=True)
sp.spatial_high_variable_genes()
df = sp.global_distance
df.to_csv('E://benchmark/stminer_' + pre + '_50.csv')
def hotspot_svg(file):
adata = sc.read_h5ad(file)
name = os.path.basename(file)
pre = name.split('.')[0]
sc.pp.filter_genes(adata, min_cells=50)
sc.pp.calculate_qc_metrics(adata, inplace=True)
sc.tl.pca(adata, svd_solver='arpack')
sc.pp.log1p(adata)
sc.pp.highly_variable_genes(adata, n_top_genes=2000)
adata.var['Gene'] = list(adata.var.index)
adata.var.query('highly_variable==True').loc[:, ['Gene', 'dispersions_norm']].to_csv(
'E://benchmark/benchmark/seurat_' + pre + '_50.csv')
hs = hotspot.Hotspot(
adata,
model='danb',
latent_obsm_key="X_pca",
umi_counts_obs_key="total_counts"
)
hs.create_knn_graph(weighted_graph=False, n_neighbors=50)
hs_results = hs.compute_autocorrelations()
hs_results.to_csv('E://benchmark/hotspots_' + pre + '_50.csv')
def spatial_de_svg(file):
name = os.path.basename(file)
pre = name.split('.')[0]
adata = sc.read_h5ad(file)
sc.pp.filter_genes(adata, min_cells=50)
sc.pp.calculate_qc_metrics(adata, inplace=True)
adata.obs['x'] = adata.obsm['spatial'][:, 0]
adata.obs['y'] = adata.obsm['spatial'][:, 1]
exp_df = pd.DataFrame(adata.X.todense(), index=adata.obs.index, columns=adata.var.index)
norm_expr = NaiveDE.stabilize(exp_df.T).T
resid_expr = NaiveDE.regress_out(adata.obs, norm_expr.T, 'np.log(total_counts)').T
sample_resid_expr = resid_expr.sample(n=len(adata.var), axis=1, random_state=1)
X = adata.obs[['x', 'y']]
results = SpatialDE.run(np.array(X), sample_resid_expr)
sign_results = results.query('qval < 0.05')
sign_results = sign_results.sort_values(by=['FSV'], ascending=False)
sign_results.to_csv('E://benchmark/spatialde_' + pre + '_50.csv')
def spatial2_de_svg(file):
name = os.path.basename(file)
pre = name.split('.')[0]
adata = sc.read_h5ad(file)
sc.pp.filter_genes(adata, min_cells=500)
sc.pp.calculate_qc_metrics(adata, inplace=True)
adata.obs['x'] = adata.obsm['spatial'][:, 0]
adata.obs['y'] = adata.obsm['spatial'][:, 1]
total_counts = sc.get.obs_df(adata, keys=["total_counts"])
exp_df = pd.DataFrame(adata.X.todense(), index=adata.obs.index, columns=adata.var.index)
norm_expr = NaiveDE.stabilize(exp_df.T).T
adata.X = NaiveDE.regress_out(total_counts, norm_expr.T, 'np.log(total_counts)').T
X = adata.obs[['x', 'y']]
df_res = SpatialDE.fit(adata, normalized=True, control=None)
df_res.set_index("gene", inplace=True)
df_res = df_res.loc[adata.var_names]
df = pd.concat([df_res, adata.var], axis=1)
df.to_csv('E://benchmark/spatialde2_' + pre + '_50.csv')
if __name__ == '__main__':
# current_dir is the h5ad file directory.
current_dir = 'E://benchmark_data'
for root, _, files in os.walk(current_dir):
for file in files:
if file.endswith('h5ad'):
h5ad_file_path = os.path.join(root, file)
# Process the H5AD file here
process_h5ad_file(h5ad_file_path)
```
:::
:::{dropdown} RCTD

We used Seurat v5 (https://satijalab.org/seurat/) for single-cell data quality control and analysis. First, we performed quality control on the single-cell data by using the CreateSeuratObject function with parameters min.cells=50 and min.features=200. After this, we normalized the data using NormalizeData(), applying the "LogNormalize" method with scale.factor=10000. Following normalization, we identified highly variable features with FindVariableFeatures() using the default "vst" method and nfeatures=2000.
The red dots in panel A stand for the highly variable features that have been screened by FindVariableFeatures, with the remaining dots being colored black. The top 10 genes from the results of FindVariableFeatures are annotated with their gene names.
After that, we used the ScaleData function to scale the data and applied RunPCA to reduce the dimensionality of the data. The features selected for dimensionality reduction are the 2000 highly variable features identified above. The top four PCs (Principal Components) of PCA (Principal Component Analysis) result are presented in panel B.
To evaluate the number of specific PCs used for cell type clustering, we used ElbowPlot to help determine the optimal number of PCs (panel C), selecting the top 10 PCs as the basis for clustering. Clustering was performed using the function FindClusters with the ‘resolution’ parameter set to 0.3. We chose a resolution of 0.3 because this was the maximum resolution that produced input usable in the subsequent RCTD analysis. When higher resolutions were used, a greater number of clusters were identified many of which had fewer than 25 cells, the minimum number per cluster that RCTD can support). At a resolution of 0.3, this set of data can be divided into 6 clusters (panel D), distinguished primarily by the genes shown in panel E. Yellow represents high contribution, while purple represents low contribution.
We next used RCTD to load the zebrafish spatial transcriptome data as a query, and ran RCTD with the annotated single-cell transcriptome data as a reference:
```R
create.RCTD(query, reference, max_cores = 4)
RCTD <- run.RCTD(RCTD, doublet_mode = 'doublet')
```
The RCTD results of the number of confident pixels of each cell type are as follows (RCTD result a):

The above results show that the overwhelming majority of the pixels are classified as cell type #4, and that the remaining cell types account for a small proportion. We further visualize the regions where different cell types are spatially distributed in the results, and the results are in (Supplement Note 3 RCTD result b), each subplot represents the spatial distribution of different cell types:
Due to the threshold setting of the RCTD's own algorithm, the cell type #3 was not assigned to any position (RCTD result figure, panel a), so while there were 6 cell types in the single-cell annotated results, the results of the RCTD had only a spatial distribution of 5 cell types.
Since the original output of RCTD (Supplement Note 3 RCTD result a, b) did not allow the user to adjust the size of the axes and spots, for better visualization and comparison with STMiner, we imported the results of RCTD into Python and re-visualized them with seaborn (https://seaborn.pydata.org/). We displayed all the spots (RCTD filtered out the spots with low 'weight 'as defined in RCTD algorithm), and used different colors to represent 'weight'.
:::
## Compare the distance between SVGs and non-SVGs
:::{dropdown} Python code
```python
import os
import numpy as np
import scanpy as sc
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
from scipy import sparse
from STMiner import SPFinder
from STMiner.Algorithm.AlgUtils import get_exp_array
from tqdm import tqdm
from scipy.sparse import csr_matrix
from statannot import add_stat_annotation
from sklearn.metrics.pairwise import (
cosine_similarity,
euclidean_distances,
manhattan_distances,
additive_chi2_kernel,
)
def scale_matrix(matrix):
current_sum = np.sum(matrix)
target_sum = 1000
scaling_factor = target_sum / current_sum
scaled_matrix = matrix * scaling_factor
return scaled_matrix
top = 2000
pair = [
("stMINER", "Hotspot"),
("stMINER", "Seurat"),
("stMINER", "SpatialDE"),
("stMINER", "SpatialDE2"),
("stMINER", "SPARK"),
("stMINER", "SPARKX"),
("stMINER", "SpaGFT"),
("stMINER", "scGCO"),
("stMINER", "nnSVG"),
]
for tag in [
"Control02",
"Control05",
"Control06",
"Control09",
]:
print(tag)
sp = SPFinder()
sp.read_h5ad(f"I://mm10//{tag}.h5ad", bin_size=10)
sc.pp.filter_genes(sp.adata, min_cells=50)
sp.get_genes_csr_array(min_cells=50)
hotspot = list(
pd.read_csv(
f"E://benchmark/hotspots_{tag}_50.csv"
)[:top]["Gene"]
)
seurat = list(
pd.read_csv(
f"E://benchmark/seurat_{tag}_50.csv"
).sort_values(by="dispersions_norm", ascending=False)[:top]["Gene"]
)
stminer = list(
pd.read_csv(
f"E://benchmark/stminer_{tag}_50.csv"
)[:top]["Gene"]
)
spatialde = list(
pd.read_csv(
f"E://benchmark/spatialde_{tag}_50.csv"
).sort_values(by="FSV", ascending=False)[:top]["g"]
)
spatialde2 = list(
pd.read_csv(
f"E://benchmark/spatialde2_{tag}_50.csv"
).sort_values(by="FSV", ascending=False)[:top]["Unnamed: 0"]
)
sparkx = list(
pd.read_csv(
f"E://benchmark/sparkx_{tag}_50.csv"
)[:top]["Unnamed: 0"]
)
spark = list(
pd.read_csv(
f"E://benchmark/spark_{tag}_50.csv"
).sort_values(by="adjusted_pvalue", ascending=True)[:top]["Unnamed: 0"]
)
spagft = list(
pd.read_csv(
f"E://benchmark/spagft_{tag}_50.csv"
)[:top]["Unnamed: 0"]
)
scgco = list(
pd.read_csv(
f"E://benchmark/spagft_{tag}_50.csv"
).sort_values(by="fdr", ascending=True)[:top]["Unnamed: 0"]
)
nndf = pd.read_csv(
f"E://benchmark/nnsvg_{tag}_50.csv"
).sort_values(by="rank", ascending=True)
mask = nndf["Unnamed: 0"].isin(list(sp.adata.var_names))
nnsvg = list(nndf[mask]["Unnamed: 0"][:top])
tot = list(
set(
hotspot
+ seurat
+ stminer
+ spatialde
+ spatialde2
+ sparkx
+ spark
+ spagft
+ scgco
+ nnsvg
)
)
total_dict = {}
for i in tqdm(tot, desc="Getting csr matrix"):
try:
total_dict[i] = csr_matrix(
np.array(
scale_matrix(get_exp_array(sp.adata, i)).flatten().reshape(1, -1)
)
)
except:
pass
global_arr = scale_matrix(sp.plot.get_global_matrix(False, False, False).todense())
X = np.array(global_arr.flatten().reshape(1, -1))
index_list = list(
set(
hotspot
+ seurat
+ stminer
+ spatialde
+ spatialde2
+ sparkx
+ spark
+ spagft
+ scgco
+ nnsvg
)
)
###############################################################################################################
print("hotspot")
hotspot_result_df = pd.DataFrame(
index=index_list,
columns=[
"1/Cosine Similarity",
"Euclidean Distances",
"Manhattan Distances",
"-Additive Chi-Square Kernel",
"Minkowski Distance",
],
)
for gene in hotspot:
Y = total_dict[gene].toarray()
cosine_sim = cosine_similarity(X, Y)[0][0]
hotspot_result_df["1/Cosine Similarity"][gene] = 1 / cosine_sim
euclidean_dist = euclidean_distances(X, Y)[0][0]
hotspot_result_df["Euclidean Distances"][gene] = euclidean_dist
manhattan_dist = manhattan_distances(X, Y)[0][0]
hotspot_result_df["Manhattan Distances"][gene] = manhattan_dist
additive_chi2_ker = additive_chi2_kernel(X, Y)[0][0]
hotspot_result_df["-Additive Chi-Square Kernel"][gene] = -additive_chi2_ker
p = 3
minkowski_dist = np.sum(np.abs(X - Y) ** p) ** (1 / p)
hotspot_result_df["Minkowski Distance"][gene] = minkowski_dist
###############################################################################################################
print("stminer")
stminer_result_df = pd.DataFrame(
index=index_list,
columns=[
"1/Cosine Similarity",
"Euclidean Distances",
"Manhattan Distances",
"-Additive Chi-Square Kernel",
"Minkowski Distance",
],
)
for gene in stminer:
Y = total_dict[gene].toarray()
cosine_sim = cosine_similarity(X, Y)[0][0]
stminer_result_df["1/Cosine Similarity"][gene] = 1 / cosine_sim
euclidean_dist = euclidean_distances(X, Y)[0][0]
stminer_result_df["Euclidean Distances"][gene] = euclidean_dist
manhattan_dist = manhattan_distances(X, Y)[0][0]
stminer_result_df["Manhattan Distances"][gene] = manhattan_dist
additive_chi2_ker = additive_chi2_kernel(X, Y)[0][0]
stminer_result_df["-Additive Chi-Square Kernel"][gene] = -additive_chi2_ker
p = 3
minkowski_dist = np.sum(np.abs(X - Y) ** p) ** (1 / p)
stminer_result_df["Minkowski Distance"][gene] = minkowski_dist
###############################################################################################################
print("seurat")
seurat_result_df = pd.DataFrame(
index=index_list,
columns=[
"1/Cosine Similarity",
"Euclidean Distances",
"Manhattan Distances",
"-Additive Chi-Square Kernel",
"Minkowski Distance",
],
)
for gene in seurat:
Y = total_dict[gene].toarray()
cosine_sim = cosine_similarity(X, Y)[0][0]
seurat_result_df["1/Cosine Similarity"][gene] = 1 / cosine_sim
euclidean_dist = euclidean_distances(X, Y)[0][0]
seurat_result_df["Euclidean Distances"][gene] = euclidean_dist
manhattan_dist = manhattan_distances(X, Y)[0][0]
seurat_result_df["Manhattan Distances"][gene] = manhattan_dist
additive_chi2_ker = additive_chi2_kernel(X, Y)[0][0]
seurat_result_df["-Additive Chi-Square Kernel"][gene] = -additive_chi2_ker
p = 3
minkowski_dist = np.sum(np.abs(X - Y) ** p) ** (1 / p)
seurat_result_df["Minkowski Distance"][gene] = minkowski_dist
###############################################################################################################
print("spatialde")
spatialde_result_df = pd.DataFrame(
index=index_list,
columns=[
"1/Cosine Similarity",
"Euclidean Distances",
"Manhattan Distances",
"-Additive Chi-Square Kernel",
"Minkowski Distance",
],
)
for gene in spatialde:
Y = total_dict[gene].toarray()
cosine_sim = cosine_similarity(X, Y)[0][0]
spatialde_result_df["1/Cosine Similarity"][gene] = 1 / cosine_sim
euclidean_dist = euclidean_distances(X, Y)[0][0]
spatialde_result_df["Euclidean Distances"][gene] = euclidean_dist
manhattan_dist = manhattan_distances(X, Y)[0][0]
spatialde_result_df["Manhattan Distances"][gene] = manhattan_dist
additive_chi2_ker = additive_chi2_kernel(X, Y)[0][0]
spatialde_result_df["-Additive Chi-Square Kernel"][gene] = -additive_chi2_ker
p = 3
minkowski_dist = np.sum(np.abs(X - Y) ** p) ** (1 / p)
spatialde_result_df["Minkowski Distance"][gene] = minkowski_dist
##########################################################################
print("spatialde2")
spatialde2_result_df = pd.DataFrame(
index=index_list,
columns=[
"1/Cosine Similarity",
"Euclidean Distances",
"Manhattan Distances",
"-Additive Chi-Square Kernel",
"Minkowski Distance",
],
)
for gene in spatialde2:
Y = total_dict[gene].toarray()
cosine_sim = cosine_similarity(X, Y)[0][0]
spatialde2_result_df["1/Cosine Similarity"][gene] = 1 / cosine_sim
euclidean_dist = euclidean_distances(X, Y)[0][0]
spatialde2_result_df["Euclidean Distances"][gene] = euclidean_dist
manhattan_dist = manhattan_distances(X, Y)[0][0]
spatialde2_result_df["Manhattan Distances"][gene] = manhattan_dist
additive_chi2_ker = additive_chi2_kernel(X, Y)[0][0]
spatialde2_result_df["-Additive Chi-Square Kernel"][gene] = -additive_chi2_ker
p = 3
minkowski_dist = np.sum(np.abs(X - Y) ** p) ** (1 / p)
spatialde2_result_df["Minkowski Distance"][gene] = minkowski_dist
########################################################################
print("spark")
spark_result_df = pd.DataFrame(
index=index_list,
columns=[
"1/Cosine Similarity",
"Euclidean Distances",
"Manhattan Distances",
"-Additive Chi-Square Kernel",
"Minkowski Distance",
],
)
for gene in spark:
Y = total_dict[gene].toarray()
cosine_sim = cosine_similarity(X, Y)[0][0]
spark_result_df["1/Cosine Similarity"][gene] = 1 / cosine_sim
euclidean_dist = euclidean_distances(X, Y)[0][0]
spark_result_df["Euclidean Distances"][gene] = euclidean_dist
manhattan_dist = manhattan_distances(X, Y)[0][0]
spark_result_df["Manhattan Distances"][gene] = manhattan_dist
additive_chi2_ker = additive_chi2_kernel(X, Y)[0][0]
spark_result_df["-Additive Chi-Square Kernel"][gene] = -additive_chi2_ker
p = 3
minkowski_dist = np.sum(np.abs(X - Y) ** p) ** (1 / p)
spark_result_df["Minkowski Distance"][gene] = minkowski_dist
################################################################
print("sparkx")
sparkx_result_df = pd.DataFrame(
index=index_list,
columns=[
"1/Cosine Similarity",
"Euclidean Distances",
"Manhattan Distances",
"-Additive Chi-Square Kernel",
"Minkowski Distance",
],
)
for gene in sparkx:
Y = total_dict[gene].toarray()
cosine_sim = cosine_similarity(X, Y)[0][0]
sparkx_result_df["1/Cosine Similarity"][gene] = 1 / cosine_sim
euclidean_dist = euclidean_distances(X, Y)[0][0]
sparkx_result_df["Euclidean Distances"][gene] = euclidean_dist
manhattan_dist = manhattan_distances(X, Y)[0][0]
sparkx_result_df["Manhattan Distances"][gene] = manhattan_dist
additive_chi2_ker = additive_chi2_kernel(X, Y)[0][0]
sparkx_result_df["-Additive Chi-Square Kernel"][gene] = -additive_chi2_ker
p = 3
minkowski_dist = np.sum(np.abs(X - Y) ** p) ** (1 / p)
sparkx_result_df["Minkowski Distance"][gene] = minkowski_dist
################################################################
print("SpaGFT")
spagft_result_df = pd.DataFrame(
index=index_list,
columns=[
"1/Cosine Similarity",
"Euclidean Distances",
"Manhattan Distances",
"-Additive Chi-Square Kernel",
"Minkowski Distance",
],
)
for gene in spagft:
Y = total_dict[gene].toarray()
cosine_sim = cosine_similarity(X, Y)[0][0]
spagft_result_df["1/Cosine Similarity"][gene] = 1 / cosine_sim
euclidean_dist = euclidean_distances(X, Y)[0][0]
spagft_result_df["Euclidean Distances"][gene] = euclidean_dist
manhattan_dist = manhattan_distances(X, Y)[0][0]
spagft_result_df["Manhattan Distances"][gene] = manhattan_dist
additive_chi2_ker = additive_chi2_kernel(X, Y)[0][0]
spagft_result_df["-Additive Chi-Square Kernel"][gene] = -additive_chi2_ker
p = 3
minkowski_dist = np.sum(np.abs(X - Y) ** p) ** (1 / p)
spagft_result_df["Minkowski Distance"][gene] = minkowski_dist
################################################################
print("scGCO")
scgco_result_df = pd.DataFrame(
index=index_list,
columns=[
"1/Cosine Similarity",
"Euclidean Distances",
"Manhattan Distances",
"-Additive Chi-Square Kernel",
"Minkowski Distance",
],
)
for gene in scgco:
Y = total_dict[gene].toarray()
cosine_sim = cosine_similarity(X, Y)[0][0]
scgco_result_df["1/Cosine Similarity"][gene] = 1 / cosine_sim
euclidean_dist = euclidean_distances(X, Y)[0][0]
scgco_result_df["Euclidean Distances"][gene] = euclidean_dist
manhattan_dist = manhattan_distances(X, Y)[0][0]
scgco_result_df["Manhattan Distances"][gene] = manhattan_dist
additive_chi2_ker = additive_chi2_kernel(X, Y)[0][0]
scgco_result_df["-Additive Chi-Square Kernel"][gene] = -additive_chi2_ker
p = 3
minkowski_dist = np.sum(np.abs(X - Y) ** p) ** (1 / p)
scgco_result_df["Minkowski Distance"][gene] = minkowski_dist
################################################################
print("nnsvg")
nnsvg_result_df = pd.DataFrame(
index=index_list,
columns=[
"1/Cosine Similarity",
"Euclidean Distances",
"Manhattan Distances",
"-Additive Chi-Square Kernel",
"Minkowski Distance",
],
)
for gene in nnsvg:
Y = total_dict[gene].toarray()
cosine_sim = cosine_similarity(X, Y)[0][0]
nnsvg_result_df["1/Cosine Similarity"][gene] = 1 / cosine_sim
euclidean_dist = euclidean_distances(X, Y)[0][0]
nnsvg_result_df["Euclidean Distances"][gene] = euclidean_dist
manhattan_dist = manhattan_distances(X, Y)[0][0]
nnsvg_result_df["Manhattan Distances"][gene] = manhattan_dist
additive_chi2_ker = additive_chi2_kernel(X, Y)[0][0]
nnsvg_result_df["-Additive Chi-Square Kernel"][gene] = -additive_chi2_ker
p = 3
minkowski_dist = np.sum(np.abs(X - Y) ** p) ** (1 / p)
nnsvg_result_df["Minkowski Distance"][gene] = minkowski_dist
################################################################
hotspot_result_df = hotspot_result_df.dropna()
stminer_result_df = stminer_result_df.dropna()
seurat_result_df = seurat_result_df.dropna()
spatialde_result_df = spatialde_result_df.dropna()
spatialde2_result_df = spatialde2_result_df.dropna()
spark_result_df = spark_result_df.dropna()
sparkx_result_df = sparkx_result_df.dropna()
scgco_result_df = scgco_result_df.dropna()
spagft_result_df = spagft_result_df.dropna()
nnsvg_result_df = nnsvg_result_df.dropna()
######################################################################
fig, axes = plt.subplots(1, 5, figsize=(12, 6))
colors = [
"#a07f64",
"#8d99ae",
"#21b6e4",
"#fae5dc",
"#eeca70",
"#a2c986",
"#99d4c6",
"#db5647",
"#c9e1ef",
"#d0b1cf",
]
sns.boxplot(
[
list(stminer_result_df["Euclidean Distances"]),
list(hotspot_result_df["Euclidean Distances"]),
list(seurat_result_df["Euclidean Distances"]),
list(spatialde_result_df["Euclidean Distances"]),
list(spatialde2_result_df["Euclidean Distances"]),
list(spark_result_df["Euclidean Distances"]),
list(sparkx_result_df["Euclidean Distances"]),
list(spagft_result_df["Euclidean Distances"]),
list(scgco_result_df["Euclidean Distances"]),
list(nnsvg_result_df["Euclidean Distances"]),
],
showfliers=False,
ax=axes[0],
width=0.55,
palette=colors,
linewidth=0.8,
)
data_eu = pd.DataFrame(
{
"Method": ["stMINER"] * len(stminer_result_df)
+ ["Hotspot"] * len(hotspot_result_df)
+ ["Seurat"] * len(seurat_result_df)
+ ["SpatialDE"] * len(spatialde_result_df)
+ ["SpatialDE2"] * len(spatialde2_result_df)
+ ["SPARK"] * len(spark_result_df)
+ ["SPARKX"] * len(sparkx_result_df)
+ ["SpaGFT"] * len(spagft_result_df)
+ ["scGCO"] * len(scgco_result_df)
+ ["nnSVG"] * len(nnsvg_result_df),
"Euclidean Distances": list(stminer_result_df["Euclidean Distances"])
+ list(hotspot_result_df["Euclidean Distances"])
+ list(seurat_result_df["Euclidean Distances"])
+ list(spatialde_result_df["Euclidean Distances"])
+ list(spatialde2_result_df["Euclidean Distances"])
+ list(spark_result_df["Euclidean Distances"])
+ list(sparkx_result_df["Euclidean Distances"])
+ list(spagft_result_df["Euclidean Distances"])
+ list(scgco_result_df["Euclidean Distances"])
+ list(nnsvg_result_df["Euclidean Distances"]),
}
)
add_stat_annotation(
axes[0],
data=data_eu,
x="Method",
y="Euclidean Distances",
box_pairs=pair,
test="Mann-Whitney",
text_format="star",
loc="outside",
verbose=0,
fontsize="small",
)
axes[0].set_ylabel("Euclidean Distances", fontname="Arial", fontsize=10)
#######################################################################################################
sns.boxplot(
[
list(stminer_result_df["Euclidean Distances"]),
list(hotspot_result_df["Euclidean Distances"]),
list(seurat_result_df["Euclidean Distances"]),
list(spatialde_result_df["Euclidean Distances"]),
list(spatialde2_result_df["Euclidean Distances"]),
list(spark_result_df["Euclidean Distances"]),
list(sparkx_result_df["Euclidean Distances"]),
list(spagft_result_df["Euclidean Distances"]),
list(scgco_result_df["Euclidean Distances"]),
list(nnsvg_result_df["Euclidean Distances"]),
],
showfliers=False,
ax=axes[1],
width=0.55,
palette=colors,
linewidth=0.8,
)
data = pd.DataFrame(
{
"Method": ["stMINER"] * len(stminer_result_df)
+ ["Hotspot"] * len(hotspot_result_df)
+ ["Seurat"] * len(seurat_result_df)
+ ["SpatialDE"] * len(spatialde_result_df)
+ ["SpatialDE2"] * len(spatialde2_result_df)
+ ["SPARK"] * len(spark_result_df)
+ ["SPARKX"] * len(sparkx_result_df)
+ ["SpaGFT"] * len(spagft_result_df)
+ ["scGCO"] * len(scgco_result_df)
+ ["nnSVG"] * len(nnsvg_result_df),
"1/Cosine Similarity": list(stminer_result_df["1/Cosine Similarity"])
+ list(hotspot_result_df["1/Cosine Similarity"])
+ list(seurat_result_df["1/Cosine Similarity"])
+ list(spatialde_result_df["1/Cosine Similarity"])
+ list(spatialde2_result_df["1/Cosine Similarity"])
+ list(spark_result_df["1/Cosine Similarity"])
+ list(sparkx_result_df["1/Cosine Similarity"])
+ list(spagft_result_df["1/Cosine Similarity"])
+ list(scgco_result_df["1/Cosine Similarity"])
+ list(nnsvg_result_df["1/Cosine Similarity"]),
}
)
add_stat_annotation(
axes[1],
data=data,
x="Method",
y="1/Cosine Similarity",
box_pairs=pair,
test="Mann-Whitney",
text_format="star",
loc="outside",
verbose=0,
fontsize="small",
)
axes[1].set_ylabel("1/Cosine Similarity", fontname="Arial", fontsize=10)
#######################################################################################################
sns.boxplot(
[
list(stminer_result_df["Manhattan Distances"]),
list(hotspot_result_df["Manhattan Distances"]),
list(seurat_result_df["Manhattan Distances"]),
list(spatialde_result_df["Manhattan Distances"]),
list(spatialde2_result_df["Manhattan Distances"]),
list(spark_result_df["Manhattan Distances"]),
list(sparkx_result_df["Manhattan Distances"]),
list(spagft_result_df["Manhattan Distances"]),
list(scgco_result_df["Manhattan Distances"]),
list(nnsvg_result_df["Manhattan Distances"]),
],
showfliers=False,
ax=axes[2],
width=0.55,
palette=colors,
linewidth=0.8,
)
data_Manhattan = pd.DataFrame(
{
"Method": ["stMINER"] * len(stminer_result_df)
+ ["Hotspot"] * len(hotspot_result_df)
+ ["Seurat"] * len(seurat_result_df)
+ ["SpatialDE"] * len(spatialde_result_df)
+ ["SpatialDE2"] * len(spatialde2_result_df)
+ ["SPARK"] * len(spark_result_df)
+ ["SPARKX"] * len(sparkx_result_df)
+ ["SpaGFT"] * len(spagft_result_df)
+ ["scGCO"] * len(scgco_result_df)
+ ["nnSVG"] * len(nnsvg_result_df),
"Manhattan Distances": list(stminer_result_df["Manhattan Distances"])
+ list(hotspot_result_df["Manhattan Distances"])
+ list(seurat_result_df["Manhattan Distances"])
+ list(spatialde_result_df["Manhattan Distances"])
+ list(spatialde2_result_df["Manhattan Distances"])
+ list(spark_result_df["Manhattan Distances"])
+ list(sparkx_result_df["Manhattan Distances"])
+ list(spagft_result_df["Manhattan Distances"])
+ list(scgco_result_df["Manhattan Distances"])
+ list(nnsvg_result_df["Manhattan Distances"]),
}
)
add_stat_annotation(
axes[2],
data=data_Manhattan,
x="Method",
y="Manhattan Distances",
box_pairs=pair,
test="Mann-Whitney",
text_format="star",
loc="outside",
verbose=0,
fontsize="small",
)
axes[2].set_ylabel("Manhattan Distances", fontname="Arial", fontsize=10)
######################################################################################################################
plt.grid(False)
sns.boxplot(
[
list(stminer_result_df["-Additive Chi-Square Kernel"]),
list(hotspot_result_df["-Additive Chi-Square Kernel"]),
list(seurat_result_df["-Additive Chi-Square Kernel"]),
list(spatialde_result_df["-Additive Chi-Square Kernel"]),
list(spatialde2_result_df["-Additive Chi-Square Kernel"]),
list(spark_result_df["-Additive Chi-Square Kernel"]),
list(sparkx_result_df["-Additive Chi-Square Kernel"]),
list(spagft_result_df["-Additive Chi-Square Kernel"]),
list(scgco_result_df["-Additive Chi-Square Kernel"]),
list(nnsvg_result_df["-Additive Chi-Square Kernel"]),
],
showfliers=False,
ax=axes[3],
width=0.55,
palette=colors,
linewidth=0.8,
)
data_Manhattan = pd.DataFrame(
{
"Method": ["stMINER"] * len(stminer_result_df)
+ ["Hotspot"] * len(hotspot_result_df)
+ ["Seurat"] * len(seurat_result_df)
+ ["SpatialDE"] * len(spatialde_result_df)
+ ["SpatialDE2"] * len(spatialde2_result_df)
+ ["SPARK"] * len(spark_result_df)
+ ["SPARKX"] * len(sparkx_result_df)
+ ["SpaGFT"] * len(spagft_result_df)
+ ["scGCO"] * len(scgco_result_df)
+ ["nnSVG"] * len(nnsvg_result_df),
"-Additive Chi-Square Kernel": list(
stminer_result_df["-Additive Chi-Square Kernel"]
)
+ list(hotspot_result_df["-Additive Chi-Square Kernel"])
+ list(seurat_result_df["-Additive Chi-Square Kernel"])
+ list(spatialde_result_df["-Additive Chi-Square Kernel"])
+ list(spatialde2_result_df["-Additive Chi-Square Kernel"])
+ list(spark_result_df["-Additive Chi-Square Kernel"])
+ list(sparkx_result_df["-Additive Chi-Square Kernel"])
+ list(spagft_result_df["-Additive Chi-Square Kernel"])
+ list(scgco_result_df["-Additive Chi-Square Kernel"])
+ list(nnsvg_result_df["-Additive Chi-Square Kernel"]),
}
)
add_stat_annotation(
axes[3],
data=data_Manhattan,
x="Method",
y="-Additive Chi-Square Kernel",
box_pairs=pair,
test="Mann-Whitney",
text_format="star",
loc="outside",
verbose=0,
fontsize="small",
)
axes[3].set_ylabel("-Additive Chi-Square Kernel", fontname="Arial", fontsize=10)
##################################################################################################
sns.boxplot(
[
list(stminer_result_df["Minkowski Distance"]),
list(hotspot_result_df["Minkowski Distance"]),
list(seurat_result_df["Minkowski Distance"]),
list(spatialde_result_df["Minkowski Distance"]),
list(spatialde2_result_df["Minkowski Distance"]),
list(spark_result_df["Minkowski Distance"]),
list(sparkx_result_df["Minkowski Distance"]),
list(spagft_result_df["Minkowski Distance"]),
list(scgco_result_df["Minkowski Distance"]),
list(nnsvg_result_df["Minkowski Distance"]),
],
showfliers=False,
ax=axes[4],
width=0.55,
palette=colors,
linewidth=0.8,
)
data_Manhattan = pd.DataFrame(
{
"Method": ["stMINER"] * len(stminer_result_df)
+ ["Hotspot"] * len(hotspot_result_df)
+ ["Seurat"] * len(seurat_result_df)
+ ["SpatialDE"] * len(spatialde_result_df)
+ ["SpatialDE2"] * len(spatialde2_result_df)
+ ["SPARK"] * len(spark_result_df)
+ ["SPARKX"] * len(sparkx_result_df)
+ ["SpaGFT"] * len(spagft_result_df)
+ ["scGCO"] * len(scgco_result_df)
+ ["nnSVG"] * len(nnsvg_result_df),
"Minkowski Distance": list(stminer_result_df["Minkowski Distance"])
+ list(hotspot_result_df["Minkowski Distance"])
+ list(seurat_result_df["Minkowski Distance"])
+ list(spatialde_result_df["Minkowski Distance"])
+ list(spatialde2_result_df["Minkowski Distance"])
+ list(spark_result_df["Minkowski Distance"])
+ list(sparkx_result_df["Minkowski Distance"])
+ list(spagft_result_df["Minkowski Distance"])
+ list(scgco_result_df["Minkowski Distance"])
+ list(nnsvg_result_df["Minkowski Distance"]),
}
)
add_stat_annotation(
axes[4],
data=data_Manhattan,
x="Method",
y="Minkowski Distance",
box_pairs=pair,
test="Mann-Whitney",
text_format="star",
loc="outside",
verbose=0,
fontsize="small",
)
axes[4].set_ylabel("Minkowski Distance", fontname="Arial", fontsize=10)
sns.set_style("white")
for ax in axes.flatten():
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_linewidth(1)
ax.spines["bottom"].set_linewidth(1)
ax.xaxis.set_ticks_position("bottom")
ax.yaxis.set_ticks_position("left")
ax.set_xticklabels(
[
"STMiner",
"Hotspot",
"Seurat",
"SpatialDE",
"SpatialDE2",
"SPARK",
"SPARKX",
"SpaGFT",
"scGCO",
"nnSVG",
],
fontname="Arial",
fontsize=7,
rotation=60,
)
ax.set_xlabel("Method", fontname="Arial", fontsize=10)
plt.subplots_adjust(wspace=0.35)
fig.text(-0.005, 0.35, tag, va="center", rotation="vertical", fontsize=10)
plt.tight_layout()
plt.savefig(f"./revision/{tag}.eps", format="eps", bbox_inches="tight")
plt.show()
```
:::