|
18 | 18 |
|
19 | 19 | import json |
20 | 20 | import os |
| 21 | +import re |
21 | 22 | import time |
22 | 23 | from datetime import datetime, timezone |
23 | 24 | from typing import Any, Dict, List |
@@ -105,7 +106,25 @@ def _print_confusion_report(label: str, metrics: dict, wrong: list) -> None: |
105 | 106 | def _save_confusion_results(label: str, metrics: dict, wrong: list, rows: list) -> dict: |
106 | 107 | """Save confusion matrix results to a JSON file and return the result dict.""" |
107 | 108 | os.makedirs(RESULTS_DIR, exist_ok=True) |
108 | | - safe_label = label.lower().replace(" ", "_").replace("—", "-") |
| 109 | + # Build a short, filesystem-safe filename from the label. |
| 110 | + # Full label is preserved inside the JSON; filename just needs to be |
| 111 | + # unique and recognisable. Format: {topic}_{method_abbrev}.json |
| 112 | + parts = label.split("\u2014") |
| 113 | + topic = parts[0].strip().lower().replace("block ", "").replace(" ", "_") |
| 114 | + method_full = parts[1].strip() if len(parts) > 1 else "" |
| 115 | + method_name = re.sub(r"\s*\(.*?\)", "", method_full).strip().lower() |
| 116 | + qualifier_match = re.search(r"\(([^)]+)\)", method_full) |
| 117 | + qualifier = qualifier_match.group(1) if qualifier_match else "" |
| 118 | + qualifier = re.sub(r"\.[a-z]+$", "", qualifier) # drop .yaml etc. |
| 119 | + if method_name == "contentfilter": |
| 120 | + safe_label = f"{topic}_cf" |
| 121 | + elif qualifier: |
| 122 | + safe_label = f"{topic}_{method_name}_{qualifier}" |
| 123 | + else: |
| 124 | + safe_label = f"{topic}_{method_name}" |
| 125 | + safe_label = safe_label.replace(" ", "_") |
| 126 | + safe_label = re.sub(r"[^a-z0-9_.\-]", "", safe_label) |
| 127 | + safe_label = re.sub(r"_+", "_", safe_label).strip("_") |
109 | 128 | result = { |
110 | 129 | "label": label, |
111 | 130 | "timestamp": datetime.now(timezone.utc).isoformat(), |
|
0 commit comments