separate knn ablation plots into individual by class, cast category to str, fixes #46

DWGodwin · DWGodwin · commit e5ce6b32fac3 · 2026-04-17T15:27:34.000-04:00
diff --git a/gelos/comp_plots.py b/gelos/comp_plots.py
@@ -122,76 +122,100 @@ def distance_matrix(
 def knn_purity_plot(
     metric_result: dict,
     output_path: str | Path = None,
+    class_labels: dict[str, str] | None = None,
     **kwargs,
 ) -> None:
     """Render KNN class purity comparison as line plots.
 
-    Creates a figure with two subplots:
-    - Top: overall purity vs k, one line per experiment
-    - Bottom: per-class purity vs k, faceted by experiment
+    Creates a figure with:
+    - Top row (full width): overall purity vs k, one line per experiment.
+    - Facet grid below: one subplot per class, each showing purity vs k with
+      one line per experiment — makes cross-model comparison direct.
 
     Args:
         metric_result: Output from ``knn_purity_comparison`` metric.
         output_path: Path to save the figure. Shows interactively if None.
+        class_labels: Optional mapping from class id (as string) to display
+            name. Used for facet subplot titles. Falls back to raw class id.
     """
     df = metric_result.get("comparison_df", pd.DataFrame())
     if df.empty:
         logger.warning("no data for KNN purity plot, skipping")
         return
 
+    class_labels = class_labels or {}
+
     overall = df[df["class"] == "overall"]
     per_class = df[df["class"] != "overall"]
-    experiments = overall["experiment"].unique()
-
-    fig, (ax_top, ax_bot) = plt.subplots(2, 1, figsize=(10, 8))
+    experiments = list(overall["experiment"].unique())
+    classes = sorted(per_class["class"].unique())
+    n_classes = len(classes)
 
-    # --- Top: overall purity ---
     markers = ["o", "s", "^", "D", "v", "P", "X", "*"]
+
+    # Layout: 1 row for overall, then a facet grid (up to 4 cols) for classes.
+    n_cols = min(4, n_classes) if n_classes else 1
+    n_facet_rows = (n_classes + n_cols - 1) // n_cols if n_classes else 0
+    fig_height = 4 + 2.5 * n_facet_rows
+    fig = plt.figure(figsize=(3.5 * n_cols, fig_height))
+    gs = fig.add_gridspec(1 + n_facet_rows, n_cols, hspace=0.5, wspace=0.3)
+
+    # --- Top: overall purity (spans all columns) ---
+    ax_top = fig.add_subplot(gs[0, :])
     for i, exp in enumerate(experiments):
         exp_data = overall[overall["experiment"] == exp].sort_values("k")
-        marker = markers[i % len(markers)]
-        ax_top.plot(exp_data["k"], exp_data["purity"], marker=marker, label=exp)
-
+        ax_top.plot(
+            exp_data["k"],
+            exp_data["purity"],
+            marker=markers[i % len(markers)],
+            label=exp,
+        )
     ax_top.set_xlabel("k")
     ax_top.set_ylabel("Purity")
     ax_top.set_ylim(0, 1.05)
     ax_top.set_title("Overall KNN Class Purity by Experiment")
     ax_top.legend()
     ax_top.grid(True, alpha=0.3)
 
-    # --- Bottom: per-class purity ---
-    classes = sorted(per_class["class"].unique())
-    n_classes = len(classes)
-    n_experiments = len(experiments)
-    k_values = sorted(per_class["k"].unique())
-    x = np.arange(len(k_values))
-    total_bars = n_classes * n_experiments
-    width = 0.8 / max(total_bars, 1)
+    # --- Facets: one subplot per class ---
+    facet_axes = []
+    for idx, cls in enumerate(classes):
+        row = 1 + idx // n_cols
+        col = idx % n_cols
+        ax = fig.add_subplot(gs[row, col])
+        facet_axes.append(ax)
 
-    for i, exp in enumerate(experiments):
-        for j, cls in enumerate(classes):
+        for i, exp in enumerate(experiments):
             subset = per_class[
                 (per_class["experiment"] == exp) & (per_class["class"] == cls)
             ].sort_values("k")
-            offset = (i * n_classes + j - total_bars / 2) * width + width / 2
-            bar_vals = [
-                subset[subset["k"] == k]["purity"].values[0]
-                if len(subset[subset["k"] == k]) > 0
-                else 0
-                for k in k_values
-            ]
-            ax_bot.bar(x + offset, bar_vals, width, label=f"{exp} — {cls}")
-
-    ax_bot.set_xlabel("k")
-    ax_bot.set_ylabel("Purity")
-    ax_bot.set_ylim(0, 1.05)
-    ax_bot.set_xticks(x)
-    ax_bot.set_xticklabels([str(k) for k in k_values])
-    ax_bot.set_title("Per-Class KNN Purity by Experiment")
-    ax_bot.legend(fontsize=7, ncol=2)
-    ax_bot.grid(True, alpha=0.3, axis="y")
-
-    fig.tight_layout()
+            if subset.empty:
+                continue
+            ax.plot(
+                subset["k"],
+                subset["purity"],
+                marker=markers[i % len(markers)],
+                label=exp,
+            )
+
+        display = class_labels.get(str(cls), str(cls))
+        ax.set_title(display)
+        ax.set_xlabel("k")
+        ax.set_ylabel("Purity")
+        ax.set_ylim(0, 1.05)
+        ax.grid(True, alpha=0.3)
+
+    # Single shared legend for facets, placed below the grid.
+    if facet_axes:
+        handles, labels = facet_axes[0].get_legend_handles_labels()
+        if handles:
+            fig.legend(
+                handles,
+                labels,
+                loc="lower center",
+                ncol=min(len(labels), 4),
+                bbox_to_anchor=(0.5, -0.02),
+            )
 
     if output_path:
         plt.savefig(output_path, dpi=300, bbox_inches="tight")
diff --git a/gelos/comparison.py b/gelos/comparison.py
@@ -38,6 +38,7 @@ class ComparisonContext:
     comp_plots: list[dict]
     output_dir: Path
     figures_dir: Path
+    class_labels: dict[str, str]
 
 
 def _resolve_embedding_path(exp: ComparisonExperiment, processed_data_dir: Path) -> Path:
@@ -85,6 +86,11 @@ def setup_comparison(
     comp_metrics = yaml_config.get("comp_metrics", [])
     comp_plots = yaml_config.get("comp_plots", [])
 
+    # Normalize class label keys to strings so they can be looked up against the
+    # CSV ``class`` column (which is string-typed on read).
+    raw_labels = yaml_config.get("class_labels", {}) or {}
+    class_labels = {str(k): str(v) for k, v in raw_labels.items()}
+
     output_dir = processed_data_dir / "comparisons" / config_stem
     output_dir.mkdir(exist_ok=True, parents=True)
     figures_dir = figures_base_dir / "comparisons" / config_stem
@@ -98,6 +104,7 @@ def setup_comparison(
         comp_plots=comp_plots,
         output_dir=output_dir,
         figures_dir=figures_dir,
+        class_labels=class_labels,
     )
 
 
@@ -123,8 +130,7 @@ def run_comparison(
 
     # Determine whether any metric requires loading embeddings
     any_needs_embeddings = any(
-        getattr(COMP_METRICS.get(m["type"]), "requires_embeddings", True)
-        for m in ctx.comp_metrics
+        getattr(COMP_METRICS.get(m["type"]), "requires_embeddings", True) for m in ctx.comp_metrics
     )
 
     # Build experiment lists: always build labels-only, load arrays only when needed
@@ -136,18 +142,15 @@ def run_comparison(
         for exp in ctx.experiments:
             emb_path = _resolve_embedding_path(exp, processed_data_dir)
             if not emb_path.exists():
-                logger.warning(
-                    f"embeddings not found for '{exp.label}' at {emb_path}, skipping"
-                )
+                logger.warning(f"embeddings not found for '{exp.label}' at {emb_path}, skipping")
                 continue
             emb = np.load(emb_path)
             loaded.append((exp.label, emb))
             logger.info(f"loaded embeddings for '{exp.label}': shape={emb.shape}")
 
         if len(loaded) < 2:
             logger.warning(
-                f"need at least 2 experiments with embeddings for comparison, "
-                f"got {len(loaded)}"
+                f"need at least 2 experiments with embeddings for comparison, got {len(loaded)}"
             )
             return {}
 
@@ -212,6 +215,7 @@ def run_comparison(
         p_fn(
             metric_results[source_metric],
             output_path=output_path,
+            class_labels=ctx.class_labels,
             **p_params,
         )
         logger.info(f"comparison plot saved to {output_path}")
diff --git a/gelos/plotting.py b/gelos/plotting.py
@@ -36,7 +36,7 @@ def scatter_2d(
     plot a 2d transform of embeddings colored according to chip category
     """
     category_column, color_dict, legend_patches = build_style_from_config(style_cfg)
-    colors = chip_gdf[category_column].loc[chip_indices].map(color_dict)
+    colors = chip_gdf[category_column].loc[chip_indices].astype(str).map(color_dict)
     transform_title = TRANSFORM_TITLES[t_type]
 
     fig = plt.figure(figsize=(10, 8))
@@ -63,9 +63,10 @@ def scatter_2d(
 def build_style_from_config(style_cfg: dict) -> tuple[str, dict, list[Patch]]:
     """Extract category_column, color_dict, and legend_patches from the style config section."""
     category_column = style_cfg["category_column"]
-    color_dict = style_cfg["colors"]
+    color_dict = {str(k): v for k, v in style_cfg["colors"].items()}
+    label_dict = {str(k): v for k, v in style_cfg["labels"].items()}
     legend_patches = [
-        Patch(color=color, label=style_cfg["labels"][k]) for k, color in color_dict.items()
+        Patch(color=color, label=label_dict[k]) for k, color in color_dict.items()
     ]
     return category_column, color_dict, legend_patches
 
@@ -83,7 +84,7 @@ def temporal_cosine_similarity(
     n_timesteps: int = 4,
     timestep_labels: list[str] | None = None,
     n_cols: int = 6,
-    ylim: tuple[float, float] = (0,1)
+    ylim: tuple[float, float] = (0.5,1)
 ) -> None:
     """Plot cosine similarity between consecutive timesteps per land-cover category.