Add capacity-scan.py with parse_table()

Implements the parse_table() function and pytest test harness for the capacity-scan script. Tests cover header matching and graceful empty return when the required header is absent. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-01 10:16:50 +02:00 · 2026-06-01 10:16:50 +02:00 · 07ecbb2789
commit 07ecbb2789
parent 3ea9109ba2
2 changed files with 73 additions and 0 deletions
--- a/scripts/capacity-scan.py
+++ b/scripts/capacity-scan.py
@ -0,0 +1,45 @@
+#!/usr/bin/env python3
+"""capacity-scan.py — deterministic capacity facts for /capacity-review.
+
+Python standard library only. Emits a JSON object to stdout.
+
+Reads physical capacities and workload allocations from the machine-readable
+tables in docs/hardware/reference.md, computes per-node allocated-vs-physical
+rollups, and cross-checks workload hostnames against `terraform output -json`
+and `ansible-inventory --list` to surface drift. Degrades gracefully when
+nothing is provisioned. Live usage stats are a documented future hook.
+
+Usage: python3 scripts/capacity-scan.py [--env staging] [--reference PATH]
+"""
+import argparse
+import json
+import os
+import subprocess
+import sys
+
+REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+def parse_table(markdown, required_cols):
+    """Return row dicts for the first markdown table whose header contains all
+    required_cols. Keys are header names; values are raw cell strings.
+    Rows whose cell count does not match the header are skipped."""
+    lines = markdown.splitlines()
+    required = set(required_cols)
+    for i, raw in enumerate(lines):
+        line = raw.strip()
+        if not line.startswith("|"):
+            continue
+        headers = [c.strip() for c in line.strip("|").split("|")]
+        if not required.issubset(set(headers)):
+            continue
+        rows = []
+        # i + 2 skips the header's GFM separator row (|---|---|)
+        for body in lines[i + 2:]:
+            if not body.strip().startswith("|"):
+                break
+            cells = [c.strip() for c in body.strip().strip("|").split("|")]
+            if len(cells) == len(headers):
+                rows.append(dict(zip(headers, cells)))
+        return rows
+    return []
--- a/tests/test_capacity_scan.py
+++ b/tests/test_capacity_scan.py
@ -0,0 +1,28 @@
+import importlib.util
+import pathlib
+
+_PATH = pathlib.Path(__file__).resolve().parent.parent / "scripts" / "capacity-scan.py"
+_spec = importlib.util.spec_from_file_location("capacity_scan", _PATH)
+cs = importlib.util.module_from_spec(_spec)
+_spec.loader.exec_module(cs)
+
+
+def test_parse_table_keys_on_header_and_ignores_extra_cols():
+    md = """
+intro text
+| node | cores | ram_gb | disk_gb | notes |
+|------|-------|--------|---------|-------|
+| pve0 | 20    | 64     | 4000    | nvme  |
+| pve1 | 20    | 64     | 4000    | nvme  |
+
+trailing text
+"""
+    rows = cs.parse_table(md, ["node", "cores", "ram_gb", "disk_gb"])
+    assert rows == [
+        {"node": "pve0", "cores": "20", "ram_gb": "64", "disk_gb": "4000", "notes": "nvme"},
+        {"node": "pve1", "cores": "20", "ram_gb": "64", "disk_gb": "4000", "notes": "nvme"},
+    ]
+
+
+def test_parse_table_returns_empty_when_header_absent():
+    assert cs.parse_table("no tables here", ["node", "cores"]) == []