boma/scripts/capacity-scan.py

#!/usr/bin/env python3
"""capacity-scan.py — deterministic capacity facts for /capacity-review.

Python standard library only. Emits a JSON object to stdout.

Reads physical capacities and workload allocations from the machine-readable
tables in docs/hardware/reference.md, computes per-node allocated-vs-physical
rollups, and cross-checks workload hostnames against `terraform output -json`
and `ansible-inventory --list` to surface drift. Degrades gracefully when
nothing is provisioned. Live usage stats are a documented future hook.

Usage: python3 scripts/capacity-scan.py [--env staging] [--reference PATH]
"""
import argparse
import json
import os
import subprocess
import sys

REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))


def parse_table(markdown, required_cols):
    """Return row dicts for the first markdown table whose header contains all
    required_cols. Keys are header names; values are raw cell strings.
    Rows whose cell count does not match the header are skipped."""
    lines = markdown.splitlines()
    required = set(required_cols)
    for i, raw in enumerate(lines):
        line = raw.strip()
        if not line.startswith("|"):
            continue
        headers = [c.strip() for c in line.strip("|").split("|")]
        if not required.issubset(set(headers)):
            continue
        rows = []
        # i + 2 skips the header's GFM separator row (|---|---|)
        for body in lines[i + 2:]:
            if not body.strip().startswith("|"):
                break
            cells = [c.strip() for c in body.strip().strip("|").split("|")]
            if len(cells) == len(headers):
                rows.append(dict(zip(headers, cells)))
        return rows
    return []
Add capacity-scan.py with parse_table() Implements the parse_table() function and pytest test harness for the capacity-scan script. Tests cover header matching and graceful empty return when the required header is absent. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> 2026-06-01 10:16:50 +02:00			`#!/usr/bin/env python3`
			`"""capacity-scan.py — deterministic capacity facts for /capacity-review.`

			`Python standard library only. Emits a JSON object to stdout.`

			`Reads physical capacities and workload allocations from the machine-readable`
			`tables in docs/hardware/reference.md, computes per-node allocated-vs-physical`
			rollups, and cross-checks workload hostnames against `terraform output -json`
			and `ansible-inventory --list` to surface drift. Degrades gracefully when
			`nothing is provisioned. Live usage stats are a documented future hook.`

			`Usage: python3 scripts/capacity-scan.py [--env staging] [--reference PATH]`
			`"""`
			`import argparse`
			`import json`
			`import os`
			`import subprocess`
			`import sys`

			`REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))`


			`def parse_table(markdown, required_cols):`
			`"""Return row dicts for the first markdown table whose header contains all`
			`required_cols. Keys are header names; values are raw cell strings.`
			`Rows whose cell count does not match the header are skipped."""`
			`lines = markdown.splitlines()`
			`required = set(required_cols)`
			`for i, raw in enumerate(lines):`
			`line = raw.strip()`
			`if not line.startswith("\|"):`
			`continue`
			`headers = [c.strip() for c in line.strip("\|").split("\|")]`
			`if not required.issubset(set(headers)):`
			`continue`
			`rows = []`
			`# i + 2 skips the header's GFM separator row (\|---\|---\|)`
			`for body in lines[i + 2:]:`
			`if not body.strip().startswith("\|"):`
			`break`
			`cells = [c.strip() for c in body.strip().strip("\|").split("\|")]`
			`if len(cells) == len(headers):`
			`rows.append(dict(zip(headers, cells)))`
			`return rows`
			`return []`