From 26fac7306e61cf04a1fb713c891b7befe0106360 Mon Sep 17 00:00:00 2001
From: mducducd <minhducddd@gmail.com>
Date: Thu, 23 Apr 2026 10:08:08 +0100
Subject: [PATCH 1/4] fix: dropna inconsistency

---
 .codex                              |  0
 src/stamp/__main__.py               |  3 +++
 src/stamp/config.yaml               |  6 +++++
 src/stamp/modeling/config.py        | 13 +++++++++++
 src/stamp/modeling/crossval.py      |  4 +++-
 src/stamp/modeling/deploy.py        |  3 ++-
 src/stamp/modeling/train.py         |  4 +++-
 src/stamp/statistics/__init__.py    | 34 +++++++++++++++++++---------
 src/stamp/statistics/categorical.py | 35 +++++++++++++++++++++++------
 9 files changed, 81 insertions(+), 21 deletions(-)
 create mode 100644 .codex

diff --git a/.codex b/.codex
new file mode 100644
index 00000000..e69de29b
diff --git a/src/stamp/__main__.py b/src/stamp/__main__.py
index 0b252d6f..2e1e1a8c 100755
--- a/src/stamp/__main__.py
+++ b/src/stamp/__main__.py
@@ -183,6 +183,9 @@ def _run_cli(args: argparse.Namespace) -> None:
                 feature_dir=config.deployment.feature_dir,
                 patient_label=config.deployment.patient_label,
                 filename_label=config.deployment.filename_label,
+                drop_patients_with_missing_ground_truth=(
+                    config.deployment.drop_patients_with_missing_ground_truth
+                ),
                 num_workers=config.deployment.num_workers,
                 accelerator=config.deployment.accelerator,
                 ground_truth_label=config.deployment.ground_truth_label,
diff --git a/src/stamp/config.yaml b/src/stamp/config.yaml
index 7ebdf9d6..3e50b986 100644
--- a/src/stamp/config.yaml
+++ b/src/stamp/config.yaml
@@ -79,6 +79,8 @@ crossval:
   # For multi-target classification you may specify a list of columns,
   # e.g. ground_truth_label: ["KRAS", "BRAF", "NRAS"]
 
+  # drop_patients_with_missing_ground_truth: true
+
   # For survival (should be status and follow-up days columns in clini table)
   # status_label: "event"
   # time_label: "time"
@@ -135,6 +137,8 @@ training:
   # For multi-target classification you may specify a list of columns,
   # e.g. ground_truth_label: ["KRAS", "BRAF", "NRAS"]
 
+  # drop_patients_with_missing_ground_truth: true
+
   # For survival (should be status and follow-up days columns in clini table)
   # status_label: "event"
   # time_label: "time"
@@ -179,6 +183,8 @@ deployment:
   # For multi-target classification you may specify a list of columns,
   # e.g. ground_truth_label: ["KRAS", "BRAF", "NRAS"]
 
+  # drop_patients_with_missing_ground_truth: true
+
   # For survival (should be status and follow-up days columns in clini table)
   # status_label: "event"
   # time_label: "time"
diff --git a/src/stamp/modeling/config.py b/src/stamp/modeling/config.py
index 5b9a6bcc..e10f1899 100644
--- a/src/stamp/modeling/config.py
+++ b/src/stamp/modeling/config.py
@@ -8,6 +8,11 @@
 from stamp.modeling.registry import ModelName
 from stamp.types import Category, PandasLabel, Task
 
+_DROP_PATIENTS_WITH_MISSING_GROUND_TRUTH_DESCRIPTION = (
+    "If true, only patients present in the clinical table are included. "
+    "Set to false to keep patients without ground truth when the task supports it."
+)
+
 
 class TrainConfig(BaseModel):
     model_config = ConfigDict(extra="forbid")
@@ -36,6 +41,10 @@ class TrainConfig(BaseModel):
         default=None,
         description="Column in the clinical table indicating follow-up or survival time (e.g. days).",
     )
+    drop_patients_with_missing_ground_truth: bool = Field(
+        default=True,
+        description=_DROP_PATIENTS_WITH_MISSING_GROUND_TRUTH_DESCRIPTION,
+    )
 
     patient_label: PandasLabel = "PATIENT"
     filename_label: PandasLabel = "FILENAME"
@@ -71,6 +80,10 @@ class DeploymentConfig(BaseModel):
     # For survival prediction
     status_label: PandasLabel | None = None
     time_label: PandasLabel | None = None
+    drop_patients_with_missing_ground_truth: bool = Field(
+        default=True,
+        description=_DROP_PATIENTS_WITH_MISSING_GROUND_TRUTH_DESCRIPTION,
+    )
 
     num_workers: int = min(os.cpu_count() or 1, 16)
     accelerator: str = "gpu" if torch.cuda.is_available() else "cpu"
diff --git a/src/stamp/modeling/crossval.py b/src/stamp/modeling/crossval.py
index 4ee71563..36770fa4 100644
--- a/src/stamp/modeling/crossval.py
+++ b/src/stamp/modeling/crossval.py
@@ -64,7 +64,9 @@ def categorical_crossval_(
         status_label=config.status_label,
         patient_label=config.patient_label,
         filename_label=config.filename_label,
-        drop_patients_with_missing_ground_truth=True,
+        drop_patients_with_missing_ground_truth=(
+            config.drop_patients_with_missing_ground_truth
+        ),
     )
     _logger.info(f"Detected feature type: {feature_type}")
 
diff --git a/src/stamp/modeling/deploy.py b/src/stamp/modeling/deploy.py
index 6d81a8fc..8f7dcfe1 100644
--- a/src/stamp/modeling/deploy.py
+++ b/src/stamp/modeling/deploy.py
@@ -72,6 +72,7 @@ def deploy_categorical_model_(
     filename_label: PandasLabel,
     num_workers: int,
     accelerator: str | Accelerator,
+    drop_patients_with_missing_ground_truth: bool = True,
 ) -> None:
     """Deploy categorical model(s) and save predictions.
 
@@ -230,7 +231,7 @@ def deploy_categorical_model_(
                 patient_to_ground_truth,
             ),
             slide_to_patient=slide_to_patient,
-            drop_patients_with_missing_ground_truth=False,
+            drop_patients_with_missing_ground_truth=drop_patients_with_missing_ground_truth,
         )
 
         patient_ids = list(patient_to_data.keys())
diff --git a/src/stamp/modeling/train.py b/src/stamp/modeling/train.py
index 61944624..81287908 100644
--- a/src/stamp/modeling/train.py
+++ b/src/stamp/modeling/train.py
@@ -63,7 +63,9 @@ def train_categorical_model_(
         status_label=config.status_label,
         patient_label=config.patient_label,
         filename_label=config.filename_label,
-        drop_patients_with_missing_ground_truth=True,
+        drop_patients_with_missing_ground_truth=(
+            config.drop_patients_with_missing_ground_truth
+        ),
     )
     _logger.info(f"Detected feature type: {feature_type}")
 
diff --git a/src/stamp/statistics/__init__.py b/src/stamp/statistics/__init__.py
index b3243ecc..ceae8a6a 100644
--- a/src/stamp/statistics/__init__.py
+++ b/src/stamp/statistics/__init__.py
@@ -16,6 +16,7 @@
 from pydantic import BaseModel, ConfigDict, Field
 
 from stamp.statistics.categorical import (
+    _drop_missing_ground_truth_rows,
     categorical_aggregated_,
     categorical_aggregated_multitarget_,
 )
@@ -94,7 +95,7 @@ def _compute_multitarget_classification_stats(
         for p in pred_csvs:
             df = _read_table(p, dtype=str)
             # Only keep rows where this target has ground truth
-            df_clean = df.dropna(subset=[target_label])
+            df_clean = _drop_missing_ground_truth_rows(df, target_label)
             if len(df_clean) > 0:
                 preds_dfs.append(df_clean)
 
@@ -229,19 +230,30 @@ def compute_stats_(
                     )
 
                 preds_dfs = [
-                    _read_table(
-                        p,
-                        usecols=[
+                    df
+                    for p in pred_csvs
+                    if len(
+                        df := _drop_missing_ground_truth_rows(
+                            _read_table(
+                                p,
+                                usecols=[
+                                    ground_truth_label,
+                                    f"{ground_truth_label}_{true_class}",
+                                ],
+                                dtype={
+                                    ground_truth_label: str,
+                                    f"{ground_truth_label}_{true_class}": float,
+                                },
+                            ),
                             ground_truth_label,
-                            f"{ground_truth_label}_{true_class}",
-                        ],
-                        dtype={
-                            ground_truth_label: str,
-                            f"{ground_truth_label}_{true_class}": float,
-                        },
+                        )
                     )
-                    for p in pred_csvs
+                    > 0
                 ]
+                if not preds_dfs:
+                    raise ValueError(
+                        "No classification rows with ground truth available for plotting."
+                    )
 
                 y_trues = [
                     np.array(df[ground_truth_label] == true_class) for df in preds_dfs
diff --git a/src/stamp/statistics/categorical.py b/src/stamp/statistics/categorical.py
index e19b1659..90d27d21 100755
--- a/src/stamp/statistics/categorical.py
+++ b/src/stamp/statistics/categorical.py
@@ -20,6 +20,21 @@
     "count",
 ]
 
+_missing_ground_truth_tokens = frozenset(
+    {"", "na", "nan", "none", "null", "n/a", "#n/a", "#na", "?", "-", "--"}
+)
+
+
+def _drop_missing_ground_truth_rows(
+    preds_df: pd.DataFrame, target_label: str
+) -> pd.DataFrame:
+    """Remove rows whose ground truth is missing or encoded as a missing token."""
+    normalized_ground_truth = preds_df[target_label].astype("string").str.strip()
+    missing_ground_truth = normalized_ground_truth.isna() | (
+        normalized_ground_truth.str.lower().isin(_missing_ground_truth_tokens)
+    )
+    return preds_df.loc[~missing_ground_truth].copy()
+
 
 def _detect_targets_from_columns(columns: Sequence[str]) -> list[str]:
     """Detect target columns from CSV column names.
@@ -125,13 +140,19 @@ def categorical_aggregated_(
     calculate the mean and 95% confidence interval for all the scores as
     well as sum the total instane count for each class.
     """
-    preds_dfs = {
-        Path(p).parent.name: _categorical(
-            pd.read_csv(p, dtype=str).dropna(subset=[ground_truth_label]),
-            ground_truth_label,
+    preds_dfs = {}
+    for p in preds_csvs:
+        df = _drop_missing_ground_truth_rows(
+            pd.read_csv(p, dtype=str), ground_truth_label
         )
-        for p in preds_csvs
-    }
+        if len(df) > 0:
+            preds_dfs[Path(p).parent.name] = _categorical(df, ground_truth_label)
+
+    if not preds_dfs:
+        raise ValueError(
+            "No classification rows with ground truth available for statistics."
+        )
+
     preds_df = pd.concat(preds_dfs).sort_index()
     preds_df.to_csv(outpath / f"{ground_truth_label}_categorical-stats_individual.csv")
     stats_df = _aggregate_categorical_stats(preds_df.reset_index())
@@ -168,7 +189,7 @@ def categorical_aggregated_multitarget_(
         preds_dfs = {}
         for fold_name, df in csv_cache.items():
             # Drop rows where this target's ground truth is missing
-            df_clean = df.dropna(subset=[target_label])
+            df_clean = _drop_missing_ground_truth_rows(df, target_label)
             if len(df_clean) > 0:
                 preds_dfs[fold_name] = _categorical(df_clean, target_label)
 

From 6c3a93ca814b4b3935db1d9dd9857528832b2037 Mon Sep 17 00:00:00 2001
From: mducducd <minhducddd@gmail.com>
Date: Thu, 23 Apr 2026 10:09:10 +0100
Subject: [PATCH 2/4] fix: dropna inconsistency

---
 .codex | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 delete mode 100644 .codex

diff --git a/.codex b/.codex
deleted file mode 100644
index e69de29b..00000000

From 8afa7b8c6b452c409074060728b9700a6835e55d Mon Sep 17 00:00:00 2001
From: mducducd <minhducddd@gmail.com>
Date: Thu, 23 Apr 2026 10:14:42 +0100
Subject: [PATCH 3/4] update

---
 src/stamp/statistics/__init__.py    | 27 ++++++++++++---------------
 src/stamp/statistics/categorical.py | 21 ++-------------------
 2 files changed, 14 insertions(+), 34 deletions(-)

diff --git a/src/stamp/statistics/__init__.py b/src/stamp/statistics/__init__.py
index ceae8a6a..04221aba 100644
--- a/src/stamp/statistics/__init__.py
+++ b/src/stamp/statistics/__init__.py
@@ -16,7 +16,6 @@
 from pydantic import BaseModel, ConfigDict, Field
 
 from stamp.statistics.categorical import (
-    _drop_missing_ground_truth_rows,
     categorical_aggregated_,
     categorical_aggregated_multitarget_,
 )
@@ -95,7 +94,7 @@ def _compute_multitarget_classification_stats(
         for p in pred_csvs:
             df = _read_table(p, dtype=str)
             # Only keep rows where this target has ground truth
-            df_clean = _drop_missing_ground_truth_rows(df, target_label)
+            df_clean = df.dropna(subset=[target_label])
             if len(df_clean) > 0:
                 preds_dfs.append(df_clean)
 
@@ -233,20 +232,18 @@ def compute_stats_(
                     df
                     for p in pred_csvs
                     if len(
-                        df := _drop_missing_ground_truth_rows(
-                            _read_table(
-                                p,
-                                usecols=[
-                                    ground_truth_label,
-                                    f"{ground_truth_label}_{true_class}",
-                                ],
-                                dtype={
-                                    ground_truth_label: str,
-                                    f"{ground_truth_label}_{true_class}": float,
-                                },
-                            ),
-                            ground_truth_label,
+                        df := _read_table(
+                            p,
+                            usecols=[
+                                ground_truth_label,
+                                f"{ground_truth_label}_{true_class}",
+                            ],
+                            dtype={
+                                ground_truth_label: str,
+                                f"{ground_truth_label}_{true_class}": float,
+                            },
                         )
+                        .dropna(subset=[ground_truth_label])
                     )
                     > 0
                 ]
diff --git a/src/stamp/statistics/categorical.py b/src/stamp/statistics/categorical.py
index 90d27d21..a267f5ca 100755
--- a/src/stamp/statistics/categorical.py
+++ b/src/stamp/statistics/categorical.py
@@ -20,21 +20,6 @@
     "count",
 ]
 
-_missing_ground_truth_tokens = frozenset(
-    {"", "na", "nan", "none", "null", "n/a", "#n/a", "#na", "?", "-", "--"}
-)
-
-
-def _drop_missing_ground_truth_rows(
-    preds_df: pd.DataFrame, target_label: str
-) -> pd.DataFrame:
-    """Remove rows whose ground truth is missing or encoded as a missing token."""
-    normalized_ground_truth = preds_df[target_label].astype("string").str.strip()
-    missing_ground_truth = normalized_ground_truth.isna() | (
-        normalized_ground_truth.str.lower().isin(_missing_ground_truth_tokens)
-    )
-    return preds_df.loc[~missing_ground_truth].copy()
-
 
 def _detect_targets_from_columns(columns: Sequence[str]) -> list[str]:
     """Detect target columns from CSV column names.
@@ -142,9 +127,7 @@ def categorical_aggregated_(
     """
     preds_dfs = {}
     for p in preds_csvs:
-        df = _drop_missing_ground_truth_rows(
-            pd.read_csv(p, dtype=str), ground_truth_label
-        )
+        df = pd.read_csv(p, dtype=str).dropna(subset=[ground_truth_label])
         if len(df) > 0:
             preds_dfs[Path(p).parent.name] = _categorical(df, ground_truth_label)
 
@@ -189,7 +172,7 @@ def categorical_aggregated_multitarget_(
         preds_dfs = {}
         for fold_name, df in csv_cache.items():
             # Drop rows where this target's ground truth is missing
-            df_clean = _drop_missing_ground_truth_rows(df, target_label)
+            df_clean = df.dropna(subset=[target_label])
             if len(df_clean) > 0:
                 preds_dfs[fold_name] = _categorical(df_clean, target_label)
 

From f60d21de05c280ddd2bd26912e24512f8839ff74 Mon Sep 17 00:00:00 2001
From: mducducd <minhducddd@gmail.com>
Date: Thu, 23 Apr 2026 10:17:27 +0100
Subject: [PATCH 4/4] format

---
 src/stamp/statistics/__init__.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/stamp/statistics/__init__.py b/src/stamp/statistics/__init__.py
index 04221aba..0a7eedef 100644
--- a/src/stamp/statistics/__init__.py
+++ b/src/stamp/statistics/__init__.py
@@ -242,8 +242,7 @@ def compute_stats_(
                                 ground_truth_label: str,
                                 f"{ground_truth_label}_{true_class}": float,
                             },
-                        )
-                        .dropna(subset=[ground_truth_label])
+                        ).dropna(subset=[ground_truth_label])
                     )
                     > 0
                 ]