{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# spDDB's Bi-variate Spatial and Non-spatial evaluation metrics - DLPFC 151508" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "FZMdJrze1DrZ", "outputId": "608b1db9-5440-4389-a4d5-6cafb2fdb5de" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "fatal: destination path 'spDDB' already exists and is not an empty directory.\n", "/content/spDDB/Experiments/_Deconvolution_Metrics_Calculation\n" ] } ], "source": [ "!git clone https://github.com/Zafar-Lab/spDDB.git\n", "%cd spDDB/Experiments/_Deconvolution_Metrics_Calculation/" ] }, { "cell_type": "markdown", "metadata": { "id": "9DOTptaLXs5Q" }, "source": [ "Mounting google drive to accessing input data" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "sszU7MS-XsCl", "outputId": "3551b732-a9f2-489a-c317-32c46491bbf5" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" ] } ], "source": [ "from google.colab import drive\n", "drive.mount('/content/drive')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "j_hk64PqYaze", "outputId": "2c012a06-5ca8-4fb3-ffff-167efc1bcdb7" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: scanpy in /usr/local/lib/python3.12/dist-packages (1.12.1)\n", "Requirement already satisfied: anndata>=0.10.8 in /usr/local/lib/python3.12/dist-packages (from scanpy) (0.12.16)\n", "Requirement already satisfied: certifi in /usr/local/lib/python3.12/dist-packages (from scanpy) (2026.4.22)\n", "Requirement already satisfied: fast-array-utils>=1.4 in /usr/local/lib/python3.12/dist-packages (from fast-array-utils[accel,sparse]>=1.4->scanpy) (1.4.1)\n", "Requirement already satisfied: h5py>=3.11 in /usr/local/lib/python3.12/dist-packages (from scanpy) (3.16.0)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.12/dist-packages (from scanpy) (1.5.3)\n", "Requirement already satisfied: legacy-api-wrap>=1.5 in /usr/local/lib/python3.12/dist-packages (from scanpy) (1.5)\n", "Requirement already satisfied: matplotlib>=3.9 in /usr/local/lib/python3.12/dist-packages (from scanpy) (3.10.0)\n", "Requirement already satisfied: natsort in /usr/local/lib/python3.12/dist-packages (from scanpy) (8.4.0)\n", "Requirement already satisfied: networkx>=2.8.8 in /usr/local/lib/python3.12/dist-packages (from scanpy) (3.6.1)\n", "Requirement already satisfied: numba>=0.60 in /usr/local/lib/python3.12/dist-packages (from scanpy) (0.60.0)\n", "Requirement already satisfied: numpy>=2 in /usr/local/lib/python3.12/dist-packages (from scanpy) (2.0.2)\n", "Requirement already satisfied: packaging>=25 in /usr/local/lib/python3.12/dist-packages (from scanpy) (26.1)\n", "Requirement already satisfied: pandas>=2.3 in /usr/local/lib/python3.12/dist-packages (from scanpy) (2.3.3)\n", "Requirement already satisfied: patsy in /usr/local/lib/python3.12/dist-packages (from scanpy) (1.0.2)\n", "Requirement already satisfied: pynndescent>=0.5.13 in /usr/local/lib/python3.12/dist-packages (from scanpy) (0.6.0)\n", "Requirement already satisfied: scikit-learn>=1.6 in /usr/local/lib/python3.12/dist-packages (from scanpy) (1.6.1)\n", "Requirement already satisfied: scipy>=1.13 in /usr/local/lib/python3.12/dist-packages (from scanpy) (1.16.3)\n", "Requirement already satisfied: seaborn>=0.13.2 in /usr/local/lib/python3.12/dist-packages (from scanpy) (0.13.2)\n", "Requirement already satisfied: session-info2 in /usr/local/lib/python3.12/dist-packages (from scanpy) (0.4.1)\n", "Requirement already satisfied: statsmodels>=0.14.5 in /usr/local/lib/python3.12/dist-packages (from scanpy) (0.14.6)\n", "Requirement already satisfied: tqdm in /usr/local/lib/python3.12/dist-packages (from scanpy) (4.67.3)\n", "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.12/dist-packages (from scanpy) (4.15.0)\n", "Requirement already satisfied: umap-learn>=0.5.12 in /usr/local/lib/python3.12/dist-packages (from scanpy) (0.5.12)\n", "Requirement already satisfied: array-api-compat>=1.7.1 in /usr/local/lib/python3.12/dist-packages (from anndata>=0.10.8->scanpy) (1.14.0)\n", "Requirement already satisfied: scverse-misc>=0.0.3 in /usr/local/lib/python3.12/dist-packages (from anndata>=0.10.8->scanpy) (0.0.7)\n", "Requirement already satisfied: zarr!=3.0.*,>=2.18.7 in /usr/local/lib/python3.12/dist-packages (from anndata>=0.10.8->scanpy) (3.2.1)\n", "Requirement already satisfied: contourpy>=1.0.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.9->scanpy) (1.3.3)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.9->scanpy) (0.12.1)\n", "Requirement already satisfied: fonttools>=4.22.0 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.9->scanpy) (4.62.1)\n", "Requirement already satisfied: kiwisolver>=1.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.9->scanpy) (1.5.0)\n", "Requirement already satisfied: pillow>=8 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.9->scanpy) (11.3.0)\n", "Requirement already satisfied: pyparsing>=2.3.1 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.9->scanpy) (3.3.2)\n", "Requirement already satisfied: python-dateutil>=2.7 in /usr/local/lib/python3.12/dist-packages (from matplotlib>=3.9->scanpy) (2.9.0.post0)\n", "Requirement already satisfied: llvmlite<0.44,>=0.43.0dev0 in /usr/local/lib/python3.12/dist-packages (from numba>=0.60->scanpy) (0.43.0)\n", "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.12/dist-packages (from pandas>=2.3->scanpy) (2025.2)\n", "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.12/dist-packages (from pandas>=2.3->scanpy) (2026.1)\n", "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.12/dist-packages (from scikit-learn>=1.6->scanpy) (3.6.0)\n", "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil>=2.7->matplotlib>=3.9->scanpy) (1.17.0)\n", "Requirement already satisfied: donfig>=0.8 in /usr/local/lib/python3.12/dist-packages (from zarr!=3.0.*,>=2.18.7->anndata>=0.10.8->scanpy) (0.8.1.post1)\n", "Requirement already satisfied: google-crc32c>=1.5 in /usr/local/lib/python3.12/dist-packages (from zarr!=3.0.*,>=2.18.7->anndata>=0.10.8->scanpy) (1.8.0)\n", "Requirement already satisfied: numcodecs>=0.14 in /usr/local/lib/python3.12/dist-packages (from zarr!=3.0.*,>=2.18.7->anndata>=0.10.8->scanpy) (0.16.5)\n", "Requirement already satisfied: pyyaml in /usr/local/lib/python3.12/dist-packages (from donfig>=0.8->zarr!=3.0.*,>=2.18.7->anndata>=0.10.8->scanpy) (6.0.3)\n" ] } ], "source": [ "!pip install scanpy" ] }, { "cell_type": "markdown", "metadata": { "id": "6NP95ImyYeS-" }, "source": [ "Importing Libraries" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "NNA8JFAzXsJW" }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import scanpy as sc\n", "import scipy\n", "from scipy.spatial.distance import pdist, squareform\n", "import matplotlib.pyplot as plt\n", "from pathlib import Path\n", "from scipy.stats import entropy\n", "from scipy.spatial.distance import jensenshannon\n", "import seaborn as sns\n", "import os\n", "import pickle\n", "from matplotlib import rcParams\n", "from scipy.spatial import distance\n", "\n", "from metrics import *\n", "from create_update_metrics import *" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 70 }, "id": "ggR2qJlBYr9q", "outputId": "2baf4be9-054e-4222-ec00-2745f8c2e369" }, "outputs": [ { "data": { "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" }, "text/plain": [ "'\\nCreate_new_evaluation: Function to compute all the metrics for all the methods for a dataset.\\nUpdate_method_evaluation: Function to update the metric values for a particular method for a dataset.\\nUpdate_metric_evaluation: Function to update the metric value for all the methods\\nNote: l and col are dataset specfic, l = 1.2 default for Visium datasets.\\n'" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"\"\"\n", "Create_new_evaluation: Function to compute all the metrics for all the methods for a dataset.\n", "Update_method_evaluation: Function to update the metric values for a particular method for a dataset.\n", "Update_metric_evaluation: Function to update the metric value for all the methods\n", "Note: l and col are dataset specfic, l = 1.2 default for Visium datasets.\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "6wjHIGHTaIwO" }, "outputs": [], "source": [ "def evaluate_method(gt, pred, coords, method, di, dataset_name, l, eps, co, metrics):\n", "\n", " ### RMSE Computation; joint excel sheet not working\n", " \"\"\"\n", " rmse_path = \"/data/Ajita/Spatial/Datasets/Spatial_Deconvolution/_Evaluation/Metrics_Calculation/rmse.csv\"\n", " jsd_path = \"/data/Ajita/Spatial/Datasets/Spatial_Deconvolution/_Evaluation/Metrics_Calculation/jsd.csv\"\n", "\n", " rmse = pd.read_csv(rmse_path, index_col=0)\n", " jsd = pd.read_csv(jsd_path, index_col=0)\n", " \"\"\"\n", " gt, pred, coords = preprocess_predictions(gt, pred, method, coords)\n", "\n", " di[\"RMSE\"].loc[1, method] = get_rmse(pred, gt)\n", " di[\"JSD\"].loc[1, method] = get_jsd(pred, gt)\n", " \"\"\"\n", " rmse.loc[dataset_name, method] = di[\"RMSE\"].loc[1, method]\n", " jsd.loc[dataset_name, method] = di[\"JSD\"].loc[1, method]\n", " rmse.to_csv(rmse_path)\n", " jsd.to_csv(jsd_path)\n", " \"\"\"\n", " # Other metrics computation\n", "\n", " # Check to see if only RMSE and JS needs to be updated.By default, RMSE and JS will always be updated.\n", " if (len(metrics) <= 2) and ((\"RMSE\" in metrics) or (\"JSD\" in metrics)):\n", " print (\"only RMSE and JS updated\")\n", " return di\n", "\n", " for col in np.array(gt.columns):\n", "\n", " for m in metrics:\n", "\n", " if (m == \"pearson\"):\n", " val = get_pearson(gt[col], pred[col], eps = eps)\n", " di[\"pearson\"].loc[col, method] = val\n", " #if (method == \"STRIDE\"):\n", " # print (\"pearson\", val)\n", "\n", " elif (m == \"cosine_sim\"):\n", " val = get_cosine_sim(gt[col], pred[col], eps)\n", " di[\"cosine_sim\"].loc[col, method] = val\n", " #print (\"cosine similarity\", val)\n", "\n", " elif (m == \"morans_r\"):\n", " val = get_morans_R(gt[col], pred[col], coords, l=l, co=co, eps = eps)\n", " di[\"morans_r\"].loc[col, method] = val\n", " #if (method == \"STRIDE\"):\n", " #print (\"Moran's R\", val)\n", " # di['spearman'].loc[col, method] = get_spearman(gt[col],pred[col],coords,l = l, co = co)\n", "\n", " elif(m == \"spatial_pearson\"):\n", " val = get_spatial_pearson(gt[col], pred[col], coords, l=l, co=co, eps = eps)\n", " di[\"spatial_pearson\"].loc[col, method] = val\n", " #print (\"spatial pearson\", val)\n", "\n", " elif (m == \"ssim\"):\n", " val = compute_ssim(gt[col], pred[col], eps = eps)\n", " di[\"ssim\"].loc[col, method] = val\n", " #print (\"ssim\", val)\n", "\n", " elif (m == \"lee_stat\"):\n", " val = compute_Lee_stats(gt[col], pred[col], coords, l=l, co=co, eps = eps)\n", " di[\"lee_stat\"].loc[col, method] = val\n", " #print (\"lee stats\", val)\n", "\n", " elif (m == \"geary_c\"):\n", " val = compute_geary(gt[col], pred[col], coords, l=l, co=co, eps = eps)\n", " di[\"geary_c\"].loc[col, method] = val\n", " #print (\"geary_c\", val)\n", "\n", " elif (m == \"AUPR\"):\n", " compute_AUPR(gt[col], pred[col], coords, l=l, co=co, eps = eps)\n", "\n", " print (method + \"done\")\n", " return di\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "1QiwbjxpZvj1" }, "outputs": [], "source": [ "def create_new_evaluation(path_to_outputs, ground_truth_path, rmse_path, jsd_path, dataset_name, col, methods_all, celltype_metrics,\n", " global_metrics, l = 1.2, eps = 1e-8, co = 0):\n", "\n", " gt, coords, adata = preprocess_groundtruth(ground_truth_path, col, dataset_name)\n", "\n", " di = {}\n", " for metric in (celltype_metrics):\n", " di[metric] = pd.DataFrame(columns = methods_all, index = adata.var_names)\n", " for metric in (global_metrics):\n", " di[metric] = pd.DataFrame(columns = methods_all, index = [1])\n", "\n", " for method in methods_all:\n", " print (method)\n", " try:\n", " if method == \"STRIDE\":\n", " pred = pd.read_table(\n", " path_to_outputs + \"output_\" + method + \".csv\", index_col=0, sep=\"\\t\"\n", " )\n", " elif method == \"Polaris\":\n", " pred = pd.read_table(\n", " path_to_outputs + \"output_\" + method + \".tsv\", index_col=0\n", " )\n", " else:\n", " pred = pd.read_csv(\n", " path_to_outputs + \"output_\" + method + \".csv\", index_col=0\n", " )\n", " except:\n", " print(method + \" output not found\")\n", " continue\n", " # If new method is getting evaluated, all metrics should be updated.\n", " di = evaluate_method(gt,pred,coords,method,di, dataset_name, l, eps, co, celltype_metrics + global_metrics)\n", "\n", " pickle.dump(di,open(path_to_outputs + \"Metrics/eval.pkl\",\"wb\"))\n", "\n", " for metric in (celltype_metrics + global_metrics):\n", " di[metric].to_csv(path_to_outputs + \"Metrics/\" + metric + '.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "zv5MmCoXYucB", "outputId": "3f69354b-8d18-42b8-d761-6716c15945af" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Autogenes\n", "Num cells dropped:0\n", "preprocessing finished (4381, 17) and (4381, 17)\n", "Autogenesdone\n" ] } ], "source": [ "eps = 0.00000001\n", "co = 0\n", "\n", "# Sample run on SONAR output\n", "l=1.2\n", "dataset_name = \"DLPFC151508\"\n", "col = \"Average_SynthST_ReX_Norm\"\n", "celltype_metrics = ['cosine_sim','morans_r','spatial_pearson','pearson','lee_stat','ssim','geary_c']\n", "\n", "global_metrics = ['JSD','RMSE']\n", "\n", "data_path = \"/content/drive/MyDrive/Major_project/Benchmarking_Shared/spDDB_tutorials/4_data/\"\n", "root_path = data_path + \"151508/\"\n", "#gt_path = data_path + \"Simulated_cell_type_proportion_DLPFC_151508.csv\"\n", "gt_path = \"/content/drive/MyDrive/Major_project/Benchmarking_Shared/spDDB_tutorials/1_data/output_CTP/simulated_st.h5ad\"\n", "rmse_path = root_path + \"RMSE.csv\"\n", "jsd_path = root_path + \"JSD.csv\"\n", "\n", "create_new_evaluation(root_path, gt_path, rmse_path, jsd_path, dataset_name, col, [\"Autogenes\"], celltype_metrics,\n", " global_metrics, l = l, eps = eps, co = co)\n" ] } ], "metadata": { "colab": { "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }