From b33130eea9e4bd13673055d9f77c4214525fa4ce Mon Sep 17 00:00:00 2001 From: sjat Date: Sat, 30 May 2026 18:56:01 +0200 Subject: [PATCH] Add /review-repo command with deterministic pre-scan and reviews store New on-demand repo audit: scripts/repo-scan.py does the cheap deterministic checks (markers, broken refs, unencrypted vaults) and inventory; the command fans out judgement reviewers across four dimensions, applies only safe/obvious fixes, and writes a tracked report to docs/reviews/. Cron + email deferred. Co-Authored-By: Claude Opus 4.8 (1M context) --- .claude/commands/review-repo.md | 89 +++++++++++++++++++ CLAUDE.md | 1 + STATUS.md | 1 + docs/reviews/README.md | 25 ++++++ scripts/repo-scan.py | 146 ++++++++++++++++++++++++++++++++ 5 files changed, 262 insertions(+) create mode 100644 .claude/commands/review-repo.md create mode 100644 docs/reviews/README.md create mode 100644 scripts/repo-scan.py diff --git a/.claude/commands/review-repo.md b/.claude/commands/review-repo.md new file mode 100644 index 0000000..c2401a6 --- /dev/null +++ b/.claude/commands/review-repo.md @@ -0,0 +1,89 @@ +# Review the repo for staleness, drift, and contradictions + +Audit the whole repo across four dimensions, apply only the safe/obvious fixes, +report the rest, and write a tracked report to `docs/reviews/`. + +## Dimensions (equal weight) + +1. **Cruft / staleness** — code, comments, or notes left behind after the thing they + addressed was resolved; commented-out blocks; done-but-undeleted TODOs; orphans. +2. **Design conformance** — does the repo obey the ADRs and CLAUDE.md conventions + (FQCN, tags, `rolename__var`, nested vault, Makefile-as-interface, etc.)? +3. **Consistency & intent** — contradictions (doc↔doc and doc↔code), overlaps, + duplication, and whether intent is actually documented. +4. **Docs-vs-reality drift** — do `STATUS.md` and the ADRs match what is actually + built? + +## Rubric (the source of truth to measure against) + +- `docs/decisions/` (ADRs) — the design decisions. +- `CLAUDE.md` — the conventions. +- `STATUS.md` — what is actually built vs only planned. + +## Process + +### Phase 0 — deterministic pre-scan +Run `python3 scripts/repo-scan.py > /tmp/repo-scan.json`. It returns the **inventory** +(roles, ADRs, runbooks, playbooks, scripts — your shard list) and **exact findings** +(markers, broken refs, unencrypted vaults). Fold these into the report verbatim. + +### Phase 1 — fan-out judgement review +Scale to repo size: +- **Small** (≤ ~10 roles, like boma today): a few sub-agents, or one pass per area. +- **Large** (many roles/docs — heading toward AnsibleBaobabV4's 100+ roles): fan out + **one reviewer per shard** (role / doc-cluster / ADR group) in parallel via the + Agent tool, or the Workflow orchestrator for the biggest sweeps. + +Give each reviewer: its slice **plus the rubric** and the four dimensions. Each +returns structured findings — `{dimension, severity (high|medium|low), +location (file:line), description, suggested_fix, auto_fixable (bool)}`. + +### Phase 2 — synthesis +- Merge and dedupe all findings (deterministic + reviewer). +- Run **one cross-cutting reviewer** over the full ADR set + `STATUS.md` + `CLAUDE.md` + to catch contradictions that span files (per-shard agents can't see these). +- Diff against the previous run's `docs/reviews/-findings.json` and tag each + finding **new / recurring / resolved**. +- Prioritise by severity; split into auto-fixable vs report-only. + +### Phase 3 — act +- Apply **only** the safe/obvious fixes (allow-list below). Run `make lint` after; + revert any fix that breaks it. +- Write the report and machine findings (see Output) and generate a follow-up prompt. +- Commit per CLAUDE.md git conventions (small/safe → `main`; a larger batch → a + branch, diff shown first). Summarise to the user: counts, what was auto-fixed, the + top open findings, and the follow-up prompt. + +## Auto-fix allow-list — safe and obvious ONLY + +**Allowed:** typos in prose/comments; broken cross-reference paths (repoint to the +correct existing path); dead commented-out blocks with no explanatory value; +obviously-stale comments that contradict the code they annotate; trivial formatting; +updating a doc reference to a renamed file. + +**Never:** anything that changes runtime behaviour, logic, variables, or task order; +anything touching secrets, vault files, or credentials; anything where intent is +ambiguous or the answer is a judgement call; deleting files; resolving a +contradiction between two ADRs (that is a design decision — report it). + +**When unsure, REPORT — do not fix.** + +## Output (written to `docs/reviews/`) + +- `docs/reviews/-review.md` — human report. +- `docs/reviews/-findings.json` — machine-readable (next run's diff + the + cron email payload). +- `docs/reviews/latest.md` — overwrite with a copy of the newest report. + +Report contents: run metadata (date, reviewed commit SHA); summary counts by +dimension/severity; auto-fixes applied (with file list); open findings (prioritised, +grouped by dimension, each with location + suggested fix + new/recurring tag); and a +generated, copy-pasteable **follow-up prompt**. See `docs/reviews/README.md`. + +## Headless / cron (future — `docs/todo.md` "Scheduled work") + +When run non-interactively (`claude -p`, e.g. a fortnightly cron): **report only** — +do not apply fixes or push to `main`. Write the report artifact and email its summary ++ the follow-up prompt to the maintainer via the machine's email skill (other +projects on this host already use it). The report file is the email payload. Validate +the headless email path when wiring the cron job. diff --git a/CLAUDE.md b/CLAUDE.md index ba498a7..685d854 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -30,6 +30,7 @@ Full design rationale: `docs/decisions/` | Check mode (dry run) | `make check PLAYBOOK=` | | Deploy a playbook | `make deploy PLAYBOOK=` | | Scaffold a new role | `make new-role NAME=` | +| Review repo for drift/cruft | `/review-repo` (Claude command) | | Encrypt a vault file | `make encrypt FILE=` | | Decrypt a vault file | `make decrypt FILE=` | | Install Python deps | `make setup` | diff --git a/STATUS.md b/STATUS.md index a5d43b3..971b3b0 100644 --- a/STATUS.md +++ b/STATUS.md @@ -19,6 +19,7 @@ _Last reviewed: 2026-05-30._ | `git` | Initialized, trunk-based on `main`, pushed to `origin` (`forgejo.nyumbani.baobab.band:7577`). | | Pre-commit hooks | Configured: lint, gitleaks, vault-encryption guard. Activate with `pre-commit install` after `make setup`. | | Vault password client | `scripts/vault-pass-client.sh` fetches the master password from Vaultwarden via `rbw` (wired as `vault_password_file`). Requires `rbw` installed + `rbw unlock`. | +| `/review-repo` | Repo audit: `scripts/repo-scan.py` (Phase 0) + `.claude/commands/review-repo.md`, reports to `docs/reviews/`. On-demand only; cron + email deferred (`docs/todo.md`). | | Terraform HCL (`terraform/`) | Written (proxmox VM module + envs) — but never run; see below | ## Scaffolded but empty — NOT implemented diff --git a/docs/reviews/README.md b/docs/reviews/README.md new file mode 100644 index 0000000..01c53d1 --- /dev/null +++ b/docs/reviews/README.md @@ -0,0 +1,25 @@ +# docs/reviews/ + +Tracked output of the `/review-repo` command (one set of files per run). This is an +**audit trail** — committed, not hand-edited. The command writes these files; don't +edit them yourself. + +## Files per run + +| File | Purpose | +|---|---| +| `-review.md` | Human-readable report | +| `-findings.json` | Machine-readable findings — used to diff new/recurring/resolved on the next run, and as the cron email payload | +| `latest.md` | A copy of the most recent report (stable path for quick reference / email) | + +## What a report contains + +- **Run metadata** — date and the commit SHA reviewed. +- **Summary** — finding counts by dimension and severity. +- **Auto-fixes applied** — what the run fixed (safe/obvious only), with a file list. +- **Open findings** — prioritised, grouped by dimension; each with a location, a + suggested fix, and a `new` / `recurring` / `resolved` tag (vs the previous run). +- **Follow-up prompt** — a copy-pasteable prompt to act on the open findings. + +The four review dimensions and the auto-fix safety rules live in +`.claude/commands/review-repo.md`. diff --git a/scripts/repo-scan.py b/scripts/repo-scan.py new file mode 100644 index 0000000..3ce3f9e --- /dev/null +++ b/scripts/repo-scan.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +""" +Phase-0 deterministic pre-scan for the /review-repo command. + +Python standard library only. Emits a JSON object to stdout: + - inventory: roles, adrs, runbooks, playbooks, scripts (the shard list) + - findings: exact, no-judgement issues (markers, broken refs, unencrypted vaults) + +The *judgement* review — contradictions, design-conformance, stale intent — is done +by the /review-repo fan-out reviewers, NOT here. This script only catches the cheap, +exact things so the reviewers can focus on what needs reasoning. + +Usage: python3 scripts/repo-scan.py [repo_root] > scan.json +""" +import json +import os +import re +import sys + +ROOT = os.path.abspath(sys.argv[1] if len(sys.argv) > 1 else ".") + +PRUNE = {".git", ".venv", ".collections", ".ansible", ".worktrees", + ".pytest_cache", "node_modules", "__pycache__"} +SKIP_PREFIX = os.path.join("docs", "reviews") # don't scan our own reports +SOURCE_EXTS = {".yml", ".yaml", ".j2", ".py", ".sh", ".md", ".tf", ".cfg", ".ini"} + +MARKER_RE = re.compile(r"(?*${}") + + +def walk_files(): + for dirpath, dirnames, filenames in os.walk(ROOT): + dirnames[:] = [d for d in dirnames if d not in PRUNE] + for f in filenames: + yield os.path.join(dirpath, f) + + +def rel(path): + return os.path.relpath(path, ROOT) + + +def inventory(): + def listdir(*parts, want_dirs=False, suffixes=None): + d = os.path.join(ROOT, *parts) + if not os.path.isdir(d): + return [] + out = [] + for e in sorted(os.listdir(d)): + full = os.path.join(d, e) + if want_dirs and not os.path.isdir(full): + continue + if suffixes and not e.endswith(suffixes): + continue + out.append(e) + return out + + return { + "roles": listdir("roles", want_dirs=True), + "adrs": listdir("docs", "decisions", suffixes=(".md",)), + "runbooks": listdir("docs", "runbooks", suffixes=(".md",)), + "playbooks": listdir("playbooks", suffixes=(".yml", ".yaml")), + "scripts": listdir("scripts"), + } + + +def adr_numbers(): + dec = os.path.join(ROOT, "docs", "decisions") + nums = set() + if os.path.isdir(dec): + for f in os.listdir(dec): + m = re.match(r"(\d{3})-", f) + if m: + nums.add(m.group(1)) + return nums + + +def scan(): + findings = [] + adrs = adr_numbers() + for path in walk_files(): + rpath = rel(path) + if rpath.startswith(SKIP_PREFIX): + continue + name = os.path.basename(path) + + if name == "vault.yml": + try: + text = open(path, encoding="utf-8", errors="replace").read() + except OSError: + continue + if not text.startswith("$ANSIBLE_VAULT"): + real = [ln for ln in text.splitlines() + if ln.strip() and not ln.lstrip().startswith("#") and ln.strip() != "---"] + if real: + findings.append({"check": "vault-unencrypted", "severity": "high", + "path": rpath, "line": 1, + "detail": "vault.yml is not ansible-vault encrypted but has content"}) + continue + + if os.path.splitext(path)[1] not in SOURCE_EXTS: + continue + try: + lines = open(path, encoding="utf-8", errors="replace").readlines() + except OSError: + continue + + for i, line in enumerate(lines, 1): + markers = sorted(set(m.group(1) for m in MARKER_RE.finditer(line))) + if markers: + findings.append({"check": "marker", "severity": "low", "path": rpath, + "line": i, "detail": f"{'/'.join(markers)}: {line.strip()[:120]}"}) + for m in ADR_REF_RE.finditer(line): + if m.group(1) not in adrs: + findings.append({"check": "broken-adr-ref", "severity": "medium", "path": rpath, + "line": i, "detail": f"references ADR-{m.group(1)} (no such file)"}) + for m in PATH_REF_RE.finditer(line): + # Skip paths that are part of a URL (e.g. a backend API endpoint). + token_prefix = line[max(line.rfind(" ", 0, m.start()), + line.rfind("\t", 0, m.start())) + 1:m.start()] + if "://" in token_prefix: + continue + ref = m.group(0).rstrip(".,);:`'\"") + if any(c in ref for c in PLACEHOLDER): + continue + if not os.path.exists(os.path.join(ROOT, ref)): + findings.append({"check": "broken-path-ref", "severity": "medium", "path": rpath, + "line": i, "detail": f"references '{ref}' which does not exist"}) + return findings + + +def main(): + result = {"root": ROOT, "inventory": inventory(), "findings": scan()} + json.dump(result, sys.stdout, indent=2) + sys.stdout.write("\n") + counts = {} + for f in result["findings"]: + counts[f["check"]] = counts.get(f["check"], 0) + 1 + summary = ", ".join(f"{k}={v}" for k, v in sorted(counts.items())) or "no deterministic findings" + print(f"repo-scan: {len(result['inventory']['roles'])} roles, " + f"{len(result['inventory']['adrs'])} ADRs; {summary}", file=sys.stderr) + + +if __name__ == "__main__": + main()