From 3dd03d4198c91aefefe84491d3185989f350ef4f Mon Sep 17 00:00:00 2001 From: sjat Date: Fri, 5 Jun 2026 18:24:39 +0200 Subject: [PATCH] review-repo: 2026-06-05 report (4 auto-fixed, 12 open) Stale-deferred check exercised: 6 open-deferred-items all confirmed genuinely open, 0 stale-deferred. Top open: thread ADR-017 VERIFY.md convention through ADR-004/new-role/README; name the askari inventory group (ADR-016). Co-Authored-By: Claude Opus 4.8 (1M context) --- docs/reviews/2026-06-05-findings.json | 88 ++++++++++++++++++++++ docs/reviews/2026-06-05-review.md | 93 +++++++++++++++++++++++ docs/reviews/latest.md | 102 ++++++++++++++++++++++---- 3 files changed, 267 insertions(+), 16 deletions(-) create mode 100644 docs/reviews/2026-06-05-findings.json create mode 100644 docs/reviews/2026-06-05-review.md diff --git a/docs/reviews/2026-06-05-findings.json b/docs/reviews/2026-06-05-findings.json new file mode 100644 index 0000000..12dd6a1 --- /dev/null +++ b/docs/reviews/2026-06-05-findings.json @@ -0,0 +1,88 @@ +{ + "date": "2026-06-05", + "reviewed_commit": "f566fd1", + "fixes_commit": "666ad42", + "mode": "on-demand", + "counts": { + "auto_fixed": 4, + "open": 12, + "scan": {"broken-path-ref": 14, "marker": 35, "open-deferred-item": 6, "stale-deferred": 0} + }, + "auto_fixed": [ + {"id": "AF1", "dimension": "consistency", "severity": "high", + "location": "docs/decisions/005-bootstrapping.md:36; docs/runbooks/new-host.md:62,71", + "description": "Terraform 'writes the host's DNS A record' contradicts ADR-009 (dns role owns the zone)", + "fix": "removed the DNS-write clause; noted Terraform writes no DNS records", + "tag": "recurring"}, + {"id": "AF2", "dimension": "consistency", "severity": "high", + "location": "docs/decisions/005-bootstrapping.md:8", + "description": "control node described as cloned from the cloud-init template; ADR-015 makes ubongo physical", + "fix": "control node is a physical box installed directly, not cloned (ADR-015)", + "tag": "new"}, + {"id": "AF3", "dimension": "consistency", "severity": "low", + "location": "CLAUDE.md:197", + "description": "Further reading missing the VERIFY.md template row", + "fix": "added docs/testing/service-verify-template.md row", + "tag": "new"}, + {"id": "AF4", "dimension": "cruft", "severity": "low", + "location": "docs/TODO.md:79", + "description": "typos: 'we we', 'seperate'", + "fix": "corrected to 'we' and 'separate'", + "tag": "new"} + ], + "open": [ + {"id": "O1", "dimension": "consistency", "severity": "medium", + "location": "docs/decisions/004-docker-model.md", + "description": "service-role standard file table lists SECURITY.md but not VERIFY.md (ADR-017/CLAUDE.md:85 mandate it)", + "suggested_fix": "add a VERIFY.md row to ADR-004's file table", "tag": "new"}, + {"id": "O2", "dimension": "consistency", "severity": "medium", + "location": "docs/runbooks/new-role.md", + "description": "no step to write VERIFY.md for service roles; STATUS.md:17 'runbooks reconciled' now overstated", + "suggested_fix": "add a VERIFY.md step mirroring the SECURITY.md step", "tag": "new"}, + {"id": "O3", "dimension": "cruft", "severity": "low", + "location": "README.md:58-60,94", + "description": "ADR list stops at 001-009; docs/ tree omits security/, testing/, hardware/", + "suggested_fix": "extend ADR list + docs/ subtree", "tag": "new"}, + {"id": "O4", "dimension": "consistency", "severity": "medium", + "location": "CLAUDE.md:106; docs/decisions/009-provisioning-handoff.md:78; scripts/tf_to_inventory.py:24", + "description": "ADR-016 says askari gets its own inventory group but none is named; valid-groups set excludes it", + "suggested_fix": "name the group; add to host-groups + ADR-009 valid groups", "tag": "new"}, + {"id": "O5", "dimension": "consistency", "severity": "medium", + "location": "docs/decisions/006-terraform.md:78", + "description": "backend.tf labelled 'Forgejo state backend' contradicts ADR-006's own local-state section", + "suggested_fix": "relabel to local state backend (no remote backend)", "tag": "new"}, + {"id": "O6", "dimension": "drift", "severity": "medium", + "location": "docs/decisions/014-knowledge-sourcing.md:88", + "description": "plugin reproducibility described as open, but TODO 10.7 is DONE", + "suggested_fix": "update to resolved state; drop the forward-pointer", "tag": "new"}, + {"id": "O7", "dimension": "consistency", "severity": "low", + "location": "docs/decisions/011-update-management.md:128", + "description": "ruled-out 'Digest-pinning the stateful tier' contradicts Decision #2 (adopts tag@digest); ADR-011 is draft", + "suggested_fix": "remove/replace the ruled-out row when accepting ADR-011 (TODO 16)", "tag": "new"}, + {"id": "O8", "dimension": "consistency", "severity": "low", + "location": "docs/decisions/003-toolchain.md:85; docs/decisions/010-forgejo-ci.md:66", + "description": "'act_runner on control node or a dedicated runner VM' ambiguous vs ADR-015", + "suggested_fix": "name ubongo as runner host; cross-ref ADR-015", "tag": "new"}, + {"id": "O9", "dimension": "consistency", "severity": "low", + "location": "docs/decisions/008-testing.md:148", + "description": "WireGuard Molecule-exclusion row framed for retired OPNsense VLAN-99 WireGuard", + "suggested_fix": "reframe to NetBird wt0 data plane (ADR-016)", "tag": "new"}, + {"id": "O10", "dimension": "consistency", "severity": "low", + "location": "docs/decisions/011-update-management.md:67", + "description": "cross-refs 'scheduled_jobs plan and ADR-010'; ADR-010 has no such plan (TODO 8.3)", + "suggested_fix": "point to TODO 8.3", "tag": "new"}, + {"id": "O11", "dimension": "consistency", "severity": "low", + "location": "docs/CAPABILITIES.md", + "description": "no row for the /verify-service (Level 4) capability decided in ADR-017", + "suggested_fix": "add an Operations row for /verify-service", "tag": "new"}, + {"id": "O12", "dimension": "cruft", "severity": "low", + "location": "docs/TODO.md:30", + "description": "item 3.10 is garbled/unfollowable", + "suggested_fix": "rewrite clearly or strike", "tag": "new"} + ], + "scan_noise": [ + "broken-path-ref x14: illustrative report-name templates (YYYY-MM-DD-.md) and not-yet-created latest.md files; scanner stops at the boundary", + "marker x35: mostly prose references to TODO.md items, not code markers", + "open-deferred-item x6: all confirmed genuinely open (ADR-011 #1-5, ADR-015 #3); 0 stale-deferred" + ] +} diff --git a/docs/reviews/2026-06-05-review.md b/docs/reviews/2026-06-05-review.md new file mode 100644 index 0000000..61f43be --- /dev/null +++ b/docs/reviews/2026-06-05-review.md @@ -0,0 +1,93 @@ +# Repo review — 2026-06-05 + +- **Reviewed commit:** `f566fd1` (scan); auto-fixes landed in `666ad42` +- **Mode:** on-demand (interactive) +- **Scope:** whole repo — 2 roles, 17 ADRs, 4 runbooks, 7 scripts; doc-heavy +- **Prior run:** 2026-05-30 (`de38d1c`) — 7 auto-fixed, 17 open + +## Summary + +| | high | medium | low | total | +|---|---|---|---|---| +| Auto-fixed | 2 | 0 | 2 | 4 | +| Open (report-only) | 0 | 5 | 7 | 12 | + +This review followed a session of heavy documentation work (ADR-015 `ubongo`, +ADR-016 NetBird mesh, ADR-017 Level-4 verification). Most findings are **propagation +gaps** — a new decision landed but an older doc still reflects the prior design. + +**New deferral check exercised.** `repo-scan.py` now enumerates open ADR "Deferred/ +Open" items and flags any another file calls resolved-but-unmarked. This run: 6 +open-deferred-items surfaced, **all confirmed genuinely open** by the cross-cutting +reviewer (ADR-011 #1–5, ADR-015 #3), **0 stale-deferred**. The check produced no false +resolutions and the judgement layer agreed — working as designed. + +## Auto-fixes applied (`666ad42`) + +| id | dim | sev | location | fix | +|---|---|---|---|---| +| AF1 | consistency | high | `docs/decisions/005-bootstrapping.md:36`, `docs/runbooks/new-host.md:62,71` | Removed "Terraform writes the host's DNS A record" — contradicts ADR-009 (the `dns` role owns the zone). **Recurring**: the 2026-05-30 run fixed the same contradiction in README/ADR-003; it reappeared in two more files. | +| AF2 | consistency | high | `docs/decisions/005-bootstrapping.md:8` | Control node described as cloned from the cloud-init template; ADR-015 makes `ubongo` a physical box installed directly. Corrected. | +| AF3 | consistency | low | `CLAUDE.md:197` | Added the missing `docs/testing/service-verify-template.md` row to Further reading (parallels the security-template row). | +| AF4 | cruft | low | `docs/TODO.md:79` | Typos: "we we" → "we"; "seperate" → "separate". | + +## Open findings (report-only) + +### VERIFY.md propagation cluster (ADR-017 not fully threaded through) + +| id | sev | location | finding | suggested fix | +|---|---|---|---|---| +| O1 | medium | `docs/decisions/004-docker-model.md` (file table) | The service-role standard lists `SECURITY.md` but not `VERIFY.md`, though ADR-017 + CLAUDE.md:85 now mandate it. | Add a `VERIFY.md` row to ADR-004's file table. | +| O2 | medium | `docs/runbooks/new-role.md` (step 9 → Commit) | No step to write `VERIFY.md` for service roles (only `SECURITY.md`). Makes `STATUS.md:17` ("runbooks current and mutually reconciled") slightly overstated. | Add a "write the per-service verification spec" step mirroring the SECURITY.md step. | +| O3 | low | `README.md:58-60, 94` | ADR list stops at 001–009 (010–017 absent); the `docs/` tree omits `security/`, `testing/`, `hardware/`. | Extend the ADR list (or point to `docs/decisions/` + CLAUDE.md's table); expand the `docs/` subtree. | + +### Design gaps from the recent ADRs + +| id | sev | location | finding | suggested fix | +|---|---|---|---|---| +| O4 | medium | `CLAUDE.md:106`, `docs/decisions/009-provisioning-handoff.md:78`, `scripts/tf_to_inventory.py:24` | ADR-016 says "`askari` is Ansible-managed — its own inventory group", but no group is named anywhere; host-groups list + valid-groups set don't include it. | Decide the group name (e.g. `edge_hosts`/`hetzner_hosts`), add to CLAUDE.md host groups + ADR-009 valid groups. (`askari` is manual like the control node, so `tf_to_inventory.py` need not generate it, but the group must be valid.) | +| O5 | medium | `docs/decisions/006-terraform.md:78` | `backend.tf` labelled "Forgejo state backend", contradicting ADR-006's own State-backend section (local state on `ubongo`; Forgejo's API is read-only). | Relabel to "local state backend (no remote backend)". | +| O6 | medium | `docs/decisions/014-knowledge-sourcing.md:88` | Plugin-reproducibility described as open ("tracked in `docs/TODO.md`"), but TODO 10.7 is marked DONE (settings.json declares the plugin set; claude-code-setup.md covers bootstrap). | Update to reflect the resolved state; drop the forward-pointer. | + +### Clarity / lower-priority consistency + +| id | sev | location | finding | suggested fix | +|---|---|---|---|---| +| O7 | low | `docs/decisions/011-update-management.md:128` | "Digest-pinning the stateful tier" sits in the ruled-out table, but Decision #2 *adopts* `tag@digest` for stateful (TODO 16 confirms). ADR-011 is still **Proposed/draft**. | Remove/replace the ruled-out row when accepting ADR-011 (TODO 16). | +| O8 | low | `docs/decisions/003-toolchain.md:85`, `docs/decisions/010-forgejo-ci.md:66` | "act_runner on the control node **or a dedicated runner VM**" reads ambiguously against ADR-015 (no cluster control VM). Not wrong (a runner VM is a separate option) but worth disambiguating. | Name `ubongo` as the runner host; cross-ref ADR-015; keep "dedicated runner VM" as an explicit future option. | +| O9 | low | `docs/decisions/008-testing.md:148` | The "WireGuard tunnel establishment" Molecule-exclusion row is framed for the retired OPNsense VLAN-99 WireGuard; NetBird still uses WireGuard (`wt0`) as its data plane. | Reframe the row to the NetBird `wt0` data-plane (ADR-016). | +| O10 | low | `docs/decisions/011-update-management.md:67` | Cross-references "the `scheduled_jobs` plan and ADR-010"; ADR-010 is Forgejo CI, not scheduled jobs (that's TODO 8.3, unbuilt). | Point to TODO 8.3 instead. | +| O11 | low | `docs/CAPABILITIES.md` §10 | No row for the `/verify-service` (Level 4) capability though ADR-017 decided it. | Add an Operations row for `/verify-service`. | +| O12 | low | `docs/TODO.md:30` (item 3.10) | Garbled text ("maybe something in the improvements of the methods in boma moods the point?") — unfollowable. | Rewrite the question clearly or strike it. | + +### Deterministic-scan noise (not fixed — known limitations) + +- **`broken-path-ref` ×14** — all illustrative/future paths: report-name templates + (`docs/testing/reviews/YYYY-MM-DD-.md`) and `latest.md` files not yet + created. The path-ref check stops at the `` boundary, so a templated + path registers as a partial broken ref. *Potential scanner improvement: skip a path + ref immediately followed by a placeholder char or a `YYYY-MM-DD` token.* +- **`marker` ×35** — mostly prose references to `TODO.md` items, not code markers. + Known noise; the regex already excludes `TODO.md`/alternations but not "TODO 8.2" + prose. +- **`open-deferred-item` ×6** — all confirmed genuinely open (see above). `0` + stale-deferred. New check healthy. + +## Diff vs prior run (2026-05-30) + +- **Recurring:** the Terraform-writes-DNS contradiction (AF1) — fixed in README/ADR-003 + last run, reappeared in ADR-005/new-host.md. Signal that this phrasing keeps being + copied; worth a `/review-repo`-time grep for "writes … DNS A record". +- **New:** everything else — the repo gained ADR-010…017 and the `ubongo`/NetBird/ + Level-4 work since the prior run, so most findings are fresh propagation gaps. +- **Resolved:** prior-run open items were largely addressed during the intervening + doc work (control-node-as-VM, WireGuard framing, etc., now mostly reconciled). + +## Follow-up prompt + +> Thread the ADR-017 `VERIFY.md` convention through the remaining docs (O1–O3): add a +> `VERIFY.md` row to ADR-004's service-role file table, a VERIFY.md step to +> `new-role.md` (and reconcile STATUS.md:17), and refresh `README.md`'s ADR list + +> `docs/` tree. Then settle the `askari` inventory group name (O4) and propagate it to +> CLAUDE.md host-groups + ADR-009 valid-groups. Finally clear the stale labels O5 +> (ADR-006 backend.tf) and O6 (ADR-014 plugin reproducibility = DONE). diff --git a/docs/reviews/latest.md b/docs/reviews/latest.md index 92124a2..61f43be 100644 --- a/docs/reviews/latest.md +++ b/docs/reviews/latest.md @@ -1,23 +1,93 @@ -# Latest repo review +# Repo review — 2026-06-05 -Most recent: **2026-05-30** → full report: `docs/reviews/2026-05-30-review.md` +- **Reviewed commit:** `f566fd1` (scan); auto-fixes landed in `666ad42` +- **Mode:** on-demand (interactive) +- **Scope:** whole repo — 2 roles, 17 ADRs, 4 runbooks, 7 scripts; doc-heavy +- **Prior run:** 2026-05-30 (`de38d1c`) — 7 auto-fixed, 17 open + +## Summary | | high | medium | low | total | |---|---|---|---|---| -| Auto-fixed | 2 | 3 | 2 | 7 | -| Open | 4 | 4 | 9 | 17 | +| Auto-fixed | 2 | 0 | 2 | 4 | +| Open (report-only) | 0 | 5 | 7 | 12 | -Dominant theme: drift from this session's own changes — residual `.vault_pass` -references after the Vaultwarden/rbw switch, and leftover PR/merge-request language -after going trunk-based. +This review followed a session of heavy documentation work (ADR-015 `ubongo`, +ADR-016 NetBird mesh, ADR-017 Level-4 verification). Most findings are **propagation +gaps** — a new decision landed but an older doc still reflects the prior design. -## Suggested follow-up prompt +**New deferral check exercised.** `repo-scan.py` now enumerates open ADR "Deferred/ +Open" items and flags any another file calls resolved-but-unmarked. This run: 6 +open-deferred-items surfaced, **all confirmed genuinely open** by the cross-cutting +reviewer (ADR-011 #1–5, ADR-015 #3), **0 stale-deferred**. The check produced no false +resolutions and the judgement layer agreed — working as designed. -> Remediate the boma 2026-05-30 review (`docs/reviews/2026-05-30-review.md`): -> 1. Purge the residual `.vault_pass` references R1–R5 → the rbw/Vaultwarden flow. -> 2. Decide the workflow model R6–R7 — I lean "keep deploy approval gates, drop the -> PR/merge-request framing"; reconcile ADR-003/008 and CLAUDE.md to match. -> 3. Resolve R8 — scaffold `base`/`docker_host` via `make new-role`, or correct -> STATUS.md/roles/README.md to say the roles don't exist yet. -> 4. Fix the Terraform `vlan_tag` wiring (R9). -> Report on the rest. +## Auto-fixes applied (`666ad42`) + +| id | dim | sev | location | fix | +|---|---|---|---|---| +| AF1 | consistency | high | `docs/decisions/005-bootstrapping.md:36`, `docs/runbooks/new-host.md:62,71` | Removed "Terraform writes the host's DNS A record" — contradicts ADR-009 (the `dns` role owns the zone). **Recurring**: the 2026-05-30 run fixed the same contradiction in README/ADR-003; it reappeared in two more files. | +| AF2 | consistency | high | `docs/decisions/005-bootstrapping.md:8` | Control node described as cloned from the cloud-init template; ADR-015 makes `ubongo` a physical box installed directly. Corrected. | +| AF3 | consistency | low | `CLAUDE.md:197` | Added the missing `docs/testing/service-verify-template.md` row to Further reading (parallels the security-template row). | +| AF4 | cruft | low | `docs/TODO.md:79` | Typos: "we we" → "we"; "seperate" → "separate". | + +## Open findings (report-only) + +### VERIFY.md propagation cluster (ADR-017 not fully threaded through) + +| id | sev | location | finding | suggested fix | +|---|---|---|---|---| +| O1 | medium | `docs/decisions/004-docker-model.md` (file table) | The service-role standard lists `SECURITY.md` but not `VERIFY.md`, though ADR-017 + CLAUDE.md:85 now mandate it. | Add a `VERIFY.md` row to ADR-004's file table. | +| O2 | medium | `docs/runbooks/new-role.md` (step 9 → Commit) | No step to write `VERIFY.md` for service roles (only `SECURITY.md`). Makes `STATUS.md:17` ("runbooks current and mutually reconciled") slightly overstated. | Add a "write the per-service verification spec" step mirroring the SECURITY.md step. | +| O3 | low | `README.md:58-60, 94` | ADR list stops at 001–009 (010–017 absent); the `docs/` tree omits `security/`, `testing/`, `hardware/`. | Extend the ADR list (or point to `docs/decisions/` + CLAUDE.md's table); expand the `docs/` subtree. | + +### Design gaps from the recent ADRs + +| id | sev | location | finding | suggested fix | +|---|---|---|---|---| +| O4 | medium | `CLAUDE.md:106`, `docs/decisions/009-provisioning-handoff.md:78`, `scripts/tf_to_inventory.py:24` | ADR-016 says "`askari` is Ansible-managed — its own inventory group", but no group is named anywhere; host-groups list + valid-groups set don't include it. | Decide the group name (e.g. `edge_hosts`/`hetzner_hosts`), add to CLAUDE.md host groups + ADR-009 valid groups. (`askari` is manual like the control node, so `tf_to_inventory.py` need not generate it, but the group must be valid.) | +| O5 | medium | `docs/decisions/006-terraform.md:78` | `backend.tf` labelled "Forgejo state backend", contradicting ADR-006's own State-backend section (local state on `ubongo`; Forgejo's API is read-only). | Relabel to "local state backend (no remote backend)". | +| O6 | medium | `docs/decisions/014-knowledge-sourcing.md:88` | Plugin-reproducibility described as open ("tracked in `docs/TODO.md`"), but TODO 10.7 is marked DONE (settings.json declares the plugin set; claude-code-setup.md covers bootstrap). | Update to reflect the resolved state; drop the forward-pointer. | + +### Clarity / lower-priority consistency + +| id | sev | location | finding | suggested fix | +|---|---|---|---|---| +| O7 | low | `docs/decisions/011-update-management.md:128` | "Digest-pinning the stateful tier" sits in the ruled-out table, but Decision #2 *adopts* `tag@digest` for stateful (TODO 16 confirms). ADR-011 is still **Proposed/draft**. | Remove/replace the ruled-out row when accepting ADR-011 (TODO 16). | +| O8 | low | `docs/decisions/003-toolchain.md:85`, `docs/decisions/010-forgejo-ci.md:66` | "act_runner on the control node **or a dedicated runner VM**" reads ambiguously against ADR-015 (no cluster control VM). Not wrong (a runner VM is a separate option) but worth disambiguating. | Name `ubongo` as the runner host; cross-ref ADR-015; keep "dedicated runner VM" as an explicit future option. | +| O9 | low | `docs/decisions/008-testing.md:148` | The "WireGuard tunnel establishment" Molecule-exclusion row is framed for the retired OPNsense VLAN-99 WireGuard; NetBird still uses WireGuard (`wt0`) as its data plane. | Reframe the row to the NetBird `wt0` data-plane (ADR-016). | +| O10 | low | `docs/decisions/011-update-management.md:67` | Cross-references "the `scheduled_jobs` plan and ADR-010"; ADR-010 is Forgejo CI, not scheduled jobs (that's TODO 8.3, unbuilt). | Point to TODO 8.3 instead. | +| O11 | low | `docs/CAPABILITIES.md` §10 | No row for the `/verify-service` (Level 4) capability though ADR-017 decided it. | Add an Operations row for `/verify-service`. | +| O12 | low | `docs/TODO.md:30` (item 3.10) | Garbled text ("maybe something in the improvements of the methods in boma moods the point?") — unfollowable. | Rewrite the question clearly or strike it. | + +### Deterministic-scan noise (not fixed — known limitations) + +- **`broken-path-ref` ×14** — all illustrative/future paths: report-name templates + (`docs/testing/reviews/YYYY-MM-DD-.md`) and `latest.md` files not yet + created. The path-ref check stops at the `` boundary, so a templated + path registers as a partial broken ref. *Potential scanner improvement: skip a path + ref immediately followed by a placeholder char or a `YYYY-MM-DD` token.* +- **`marker` ×35** — mostly prose references to `TODO.md` items, not code markers. + Known noise; the regex already excludes `TODO.md`/alternations but not "TODO 8.2" + prose. +- **`open-deferred-item` ×6** — all confirmed genuinely open (see above). `0` + stale-deferred. New check healthy. + +## Diff vs prior run (2026-05-30) + +- **Recurring:** the Terraform-writes-DNS contradiction (AF1) — fixed in README/ADR-003 + last run, reappeared in ADR-005/new-host.md. Signal that this phrasing keeps being + copied; worth a `/review-repo`-time grep for "writes … DNS A record". +- **New:** everything else — the repo gained ADR-010…017 and the `ubongo`/NetBird/ + Level-4 work since the prior run, so most findings are fresh propagation gaps. +- **Resolved:** prior-run open items were largely addressed during the intervening + doc work (control-node-as-VM, WireGuard framing, etc., now mostly reconciled). + +## Follow-up prompt + +> Thread the ADR-017 `VERIFY.md` convention through the remaining docs (O1–O3): add a +> `VERIFY.md` row to ADR-004's service-role file table, a VERIFY.md step to +> `new-role.md` (and reconcile STATUS.md:17), and refresh `README.md`'s ADR list + +> `docs/` tree. Then settle the `askari` inventory group name (O4) and propagate it to +> CLAUDE.md host-groups + ADR-009 valid-groups. Finally clear the stale labels O5 +> (ADR-006 backend.tf) and O6 (ADR-014 plugin reproducibility = DONE).