2026-05-30 18:56:01 +02:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
"""
|
|
|
|
|
Phase-0 deterministic pre-scan for the /review-repo command.
|
|
|
|
|
|
|
|
|
|
Python standard library only. Emits a JSON object to stdout:
|
|
|
|
|
- inventory: roles, adrs, runbooks, playbooks, scripts (the shard list)
|
|
|
|
|
- findings: exact, no-judgement issues (markers, broken refs, unencrypted vaults)
|
|
|
|
|
|
|
|
|
|
The *judgement* review — contradictions, design-conformance, stale intent — is done
|
|
|
|
|
by the /review-repo fan-out reviewers, NOT here. This script only catches the cheap,
|
|
|
|
|
exact things so the reviewers can focus on what needs reasoning.
|
|
|
|
|
|
|
|
|
|
Usage: python3 scripts/repo-scan.py [repo_root] > scan.json
|
|
|
|
|
"""
|
|
|
|
|
import json
|
|
|
|
|
import os
|
|
|
|
|
import re
|
|
|
|
|
import sys
|
|
|
|
|
|
|
|
|
|
ROOT = os.path.abspath(sys.argv[1] if len(sys.argv) > 1 else ".")
|
|
|
|
|
|
|
|
|
|
PRUNE = {".git", ".venv", ".collections", ".ansible", ".worktrees",
|
|
|
|
|
".pytest_cache", "node_modules", "__pycache__"}
|
|
|
|
|
SKIP_PREFIX = os.path.join("docs", "reviews") # don't scan our own reports
|
|
|
|
|
SOURCE_EXTS = {".yml", ".yaml", ".j2", ".py", ".sh", ".md", ".tf", ".cfg", ".ini"}
|
|
|
|
|
|
2026-06-05 20:37:40 +02:00
|
|
|
# Marker words, but NOT when part of a regex alternation `(TODO|...)`, a filename
|
|
|
|
|
# like `TODO.md` / `docs/TODO.md`, or a numbered backlog reference like `TODO 8.2`
|
|
|
|
|
# / `TODO item 16` / `TODO #3` (those point at the backlog, they are not code markers).
|
|
|
|
|
MARKER_RE = re.compile(
|
|
|
|
|
r"(?<![(|/])\b(TODO|FIXME|XXX|HACK)\b(?![|)]|\.\w|[\s\-]*\(?\s*(?:item\s+)?#?\d)")
|
2026-05-30 18:56:01 +02:00
|
|
|
ADR_REF_RE = re.compile(r"\bADR-(\d{3})\b")
|
|
|
|
|
PATH_REF_RE = re.compile(r"(?:docs|scripts|roles|inventories|terraform|playbooks)/[\w./-]+")
|
|
|
|
|
PLACEHOLDER = set("<>*${}")
|
|
|
|
|
|
2026-06-05 18:13:49 +02:00
|
|
|
# Stale-deferred detection: ADR "Deferred/Open" entries that another file describes
|
|
|
|
|
# as resolved, but which aren't marked resolved in place. (See docs/FRICTION.md.)
|
|
|
|
|
RESOLVE_MARK_RE = re.compile(r"\b(?:RESOLVED|DECIDED)\b", re.I)
|
|
|
|
|
LIST_ITEM_RE = re.compile(r"^\s*(\d+\.|[-*+])\s+(.*)")
|
|
|
|
|
# An external "this resolves ADR-NNN deferred #K" style reference.
|
|
|
|
|
DEFER_REF_RE = re.compile(r"ADR-(\d{3})\D{0,40}?deferred\D{0,12}?(\d+)", re.I)
|
|
|
|
|
RESOLVE_WORD_RE = re.compile(r"\b(?:resolv\w*|decid\w*|address\w*|complet\w*|done)\b", re.I)
|
|
|
|
|
|
2026-06-17 17:49:41 +02:00
|
|
|
# Rename-incomplete detection: an ADR announces a rename/supersession of a named
|
|
|
|
|
# term (Old → New); verify the OLD name no longer lingers in the design-doc set.
|
|
|
|
|
# (The structural cousin of stale-deferred — see docs/FRICTION.md, ADR-024.)
|
|
|
|
|
# A "specific" name is a backticked token or a capitalised proper-noun/identifier;
|
|
|
|
|
# common connective words are rejected so they can't be mistaken for a tool name.
|
|
|
|
|
_NAME = r"(?:`[^`]+`|[A-Z][A-Za-z0-9_+.-]{2,})"
|
|
|
|
|
RENAME_STOPWORDS = {
|
|
|
|
|
"was", "were", "the", "this", "that", "with", "from", "into", "and", "but",
|
|
|
|
|
"for", "are", "has", "had", "been", "now", "not", "all", "any", "use", "used",
|
|
|
|
|
"via", "per", "its", "our", "one", "two", "old", "new", "phase", "step",
|
|
|
|
|
"adr", "read", "name", "term", "tool", "prose", "roadmap",
|
|
|
|
|
}
|
|
|
|
|
# Trigger forms — each captures (old, new) as raw name tokens; the connective words
|
|
|
|
|
# are case-insensitive but the names must still satisfy _NAME (specific tokens).
|
|
|
|
|
RENAME_ASSERT_RES = (
|
|
|
|
|
# renamed X to Y
|
|
|
|
|
re.compile(rf"renamed\s+(?:from\s+)?({_NAME})\s+to\s+({_NAME})", re.I),
|
|
|
|
|
# replaced X with Y
|
|
|
|
|
re.compile(rf"replac\w*\s+({_NAME})\s+with\s+({_NAME})", re.I),
|
|
|
|
|
# superseded X with/by Y
|
|
|
|
|
re.compile(rf"supersed\w*\s+({_NAME})\s+(?:with|by)\s+({_NAME})", re.I),
|
|
|
|
|
# X ... (is/are/was/were/been) updated to read Y
|
|
|
|
|
re.compile(rf"({_NAME})\b.{{0,40}}?\b(?:is|are|was|were|been)?\s*"
|
|
|
|
|
rf"updated\s+to\s+read\s+[\"']?({_NAME})", re.I),
|
|
|
|
|
# X → Y / X -> Y on a line that also carries a rename/supersede/update cue
|
|
|
|
|
re.compile(rf"({_NAME})\s*(?:->|→)\s*({_NAME})"),
|
|
|
|
|
)
|
|
|
|
|
RENAME_ARROW_RES = (RENAME_ASSERT_RES[-1],) # arrow forms need a cue word on the line
|
|
|
|
|
RENAME_CUE_RE = re.compile(r"\b(?:renam\w*|replac\w*|supersed\w*|updated|rename)\b", re.I)
|
|
|
|
|
# Historical / negation cues — a lingering OLD name on such a line is legitimate
|
|
|
|
|
# history, not a missed ripple edit, so it is skipped.
|
|
|
|
|
RENAME_HIST_RE = re.compile(
|
|
|
|
|
r"\b(?:was|were|formerly|previously|no longer|instead of|rather than|reject\w*|"
|
|
|
|
|
r"reconsider\w*|supersed\w*|deprecat\w*|legacy|history|heritage|V4|"
|
|
|
|
|
r"actually ran|used to)\b", re.I)
|
|
|
|
|
|
2026-06-10 13:57:42 +02:00
|
|
|
# ADR-structure check (ADR-023): numbered ADRs must carry the four mandatory
|
|
|
|
|
# sections and a parseable Status line. Presence only — section ORDER is a
|
|
|
|
|
# template-demonstrated convention, not machine-enforced.
|
|
|
|
|
ADR_FILE_RE = re.compile(r"^\d{3}-.*\.md$")
|
|
|
|
|
ADR_REQUIRED_SECTIONS = ("Status", "Context", "Decision", "Consequences")
|
|
|
|
|
ADR_STATUS_LINE_RE = re.compile(
|
2026-06-10 14:48:55 +02:00
|
|
|
r"^(Proposed \(\d{4}-\d{2}-\d{2}\)"
|
|
|
|
|
r"|Accepted \(\d{4}-\d{2}-\d{2}\)"
|
2026-06-10 15:00:58 +02:00
|
|
|
r"|Superseded by ADR-\d{3} \(\d{4}-\d{2}-\d{2}\)"
|
2026-06-10 13:57:42 +02:00
|
|
|
r"|Deprecated \(\d{4}-\d{2}-\d{2}\))")
|
|
|
|
|
|
2026-06-05 18:13:49 +02:00
|
|
|
|
|
|
|
|
def _is_defer_heading(text):
|
|
|
|
|
t = text.strip().lower()
|
|
|
|
|
return (t.startswith("deferred") or t.startswith("unresolved")
|
|
|
|
|
or "open question" in t or "open issue" in t)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _defer_subject(item_text):
|
|
|
|
|
m = re.search(r"\*\*(.+?)\*\*", item_text)
|
|
|
|
|
s = m.group(1) if m else re.split(r"\s+[—–-]\s+|:", item_text, maxsplit=1)[0]
|
|
|
|
|
return re.sub(r"\s+", " ", s).strip(" *_`~—–-:.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def deferred_findings(adr_files, defer_refs):
|
|
|
|
|
"""adr_files: {rel_path: [lines]} for docs/decisions/*.md.
|
|
|
|
|
defer_refs: [(adr, ordinal, path, line, has_resolve_word)] gathered repo-wide.
|
|
|
|
|
Emits one informational `open-deferred-item` per open entry, and a `stale-deferred`
|
|
|
|
|
contradiction when another file describes that entry as resolved."""
|
|
|
|
|
out = []
|
|
|
|
|
for rpath, lines in sorted(adr_files.items()):
|
|
|
|
|
madr = re.match(r"(\d{3})-", os.path.basename(rpath))
|
|
|
|
|
adr_num = madr.group(1) if madr else None
|
|
|
|
|
in_defer = False
|
|
|
|
|
for i, raw in enumerate(lines, 1):
|
|
|
|
|
hm = re.match(r"#{1,6}\s+(.*)", raw)
|
|
|
|
|
if hm:
|
|
|
|
|
in_defer = _is_defer_heading(hm.group(1))
|
|
|
|
|
continue
|
|
|
|
|
if not in_defer:
|
|
|
|
|
continue
|
|
|
|
|
im = LIST_ITEM_RE.match(raw)
|
|
|
|
|
if not im:
|
|
|
|
|
continue
|
|
|
|
|
marker, item_text = im.group(1), im.group(2)
|
|
|
|
|
# self-marked resolved (inline RESOLVED/DECIDED or ~~strikethrough~~) → fine
|
|
|
|
|
if RESOLVE_MARK_RE.search(raw) or item_text.lstrip().startswith("~~"):
|
|
|
|
|
continue
|
|
|
|
|
ordinal = int(marker[:-1]) if marker[:-1].isdigit() else None
|
|
|
|
|
subject = _defer_subject(item_text)
|
|
|
|
|
tag = f" #{ordinal}" if ordinal else ""
|
|
|
|
|
out.append({"check": "open-deferred-item", "severity": "low", "path": rpath,
|
|
|
|
|
"line": i, "detail": f"open deferred item{tag} in ADR-{adr_num}: "
|
|
|
|
|
f"'{subject[:80]}' — confirm not resolved by a later ADR/STATUS"})
|
|
|
|
|
if adr_num and ordinal:
|
|
|
|
|
for ra, rk, rp, rl, has_res in defer_refs:
|
|
|
|
|
if ra == adr_num and rk == ordinal and rp != rpath and has_res:
|
|
|
|
|
out.append({"check": "stale-deferred", "severity": "medium",
|
|
|
|
|
"path": rpath, "line": i,
|
|
|
|
|
"detail": f"ADR-{adr_num} deferred #{ordinal} "
|
|
|
|
|
f"('{subject[:60]}') is described as resolved at "
|
|
|
|
|
f"{rp}:{rl}, but is not marked RESOLVED in place"})
|
|
|
|
|
return out
|
|
|
|
|
|
2026-05-30 18:56:01 +02:00
|
|
|
|
2026-06-10 13:57:42 +02:00
|
|
|
def adr_structure_findings(adr_files):
|
|
|
|
|
"""adr_files: {rel_path: [lines]} for docs/decisions/*.md.
|
|
|
|
|
Flags numbered ADRs (NNN-*.md) missing a mandatory section or whose Status
|
|
|
|
|
section has no parseable lifecycle line. Non-numbered files (e.g.
|
|
|
|
|
adr-template.md) are skipped. Section order is NOT checked (ADR-023)."""
|
|
|
|
|
out = []
|
|
|
|
|
for rpath, lines in sorted(adr_files.items()):
|
|
|
|
|
if not ADR_FILE_RE.match(os.path.basename(rpath)):
|
|
|
|
|
continue
|
|
|
|
|
headings = {}
|
|
|
|
|
for i, line in enumerate(lines):
|
|
|
|
|
m = re.match(r"^##\s+(\w+)", line)
|
|
|
|
|
if m:
|
|
|
|
|
headings.setdefault(m.group(1), i)
|
|
|
|
|
missing = [s for s in ADR_REQUIRED_SECTIONS if s not in headings]
|
|
|
|
|
if missing:
|
|
|
|
|
out.append({"check": "adr-structure", "severity": "medium",
|
|
|
|
|
"path": rpath, "line": 1,
|
|
|
|
|
"detail": f"missing mandatory section(s): {', '.join(missing)}"})
|
|
|
|
|
if "Status" in headings:
|
|
|
|
|
body = []
|
|
|
|
|
for line in lines[headings["Status"] + 1:]:
|
|
|
|
|
if line.startswith("## "):
|
|
|
|
|
break
|
|
|
|
|
body.append(line)
|
|
|
|
|
status_text = next((ln.strip() for ln in body if ln.strip()), "")
|
|
|
|
|
if not ADR_STATUS_LINE_RE.match(status_text):
|
|
|
|
|
out.append({"check": "adr-structure", "severity": "medium",
|
|
|
|
|
"path": rpath, "line": headings["Status"] + 1,
|
2026-06-10 14:48:55 +02:00
|
|
|
"detail": "Status not parseable (want 'Proposed (YYYY-MM-DD)', "
|
2026-06-10 15:00:58 +02:00
|
|
|
"'Accepted (YYYY-MM-DD)', 'Superseded by ADR-NNN "
|
|
|
|
|
"(YYYY-MM-DD)', or 'Deprecated (YYYY-MM-DD)'); "
|
2026-06-10 13:57:42 +02:00
|
|
|
f"got: {status_text[:60]!r}"})
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
2026-06-17 17:49:41 +02:00
|
|
|
def _clean_name(tok):
|
|
|
|
|
"""Strip backticks/quotes from a captured name token. Return the bare name, or
|
|
|
|
|
None if it is not a 'specific' token (empty, multi-word, or a stopword)."""
|
|
|
|
|
s = tok.strip().strip("`\"'").strip()
|
|
|
|
|
s = s.rstrip(".,;:!?)") # trailing sentence punctuation is not part of the name
|
|
|
|
|
if not s or " " in s:
|
|
|
|
|
return None
|
|
|
|
|
if s.lower() in RENAME_STOPWORDS:
|
|
|
|
|
return None
|
|
|
|
|
# An ADR reference (ADR-017) is a document pointer, never the renamed *term* — a
|
|
|
|
|
# sentence like "the ADR-017 prose ... is updated to read Caddy" must not parse
|
|
|
|
|
# ADR-017 as the old name. Reject it so such lines skip (precision >> recall).
|
|
|
|
|
if re.fullmatch(r"ADR-\d{3}", s):
|
|
|
|
|
return None
|
|
|
|
|
# Must be backtick-able identifier or a capitalised proper noun (the _NAME shape
|
|
|
|
|
# already enforced this on capture; this is the after-stripping re-check).
|
|
|
|
|
if not re.fullmatch(r"[A-Za-z0-9_+.-]{3,}", s):
|
|
|
|
|
return None
|
|
|
|
|
return s
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _rename_assertion(line):
|
|
|
|
|
"""Parse a single ADR line for a tight Old→New rename assertion. Returns
|
|
|
|
|
(old, new) of cleaned specific names, or None. Conservative: precision >> recall."""
|
|
|
|
|
for rx in RENAME_ASSERT_RES:
|
|
|
|
|
m = rx.search(line)
|
|
|
|
|
if not m:
|
|
|
|
|
continue
|
|
|
|
|
# Arrow form only counts when the line also carries a rename/supersede cue.
|
|
|
|
|
if rx in RENAME_ARROW_RES and not RENAME_CUE_RE.search(line):
|
|
|
|
|
continue
|
|
|
|
|
old, new = _clean_name(m.group(1)), _clean_name(m.group(2))
|
|
|
|
|
if old and new and old != new:
|
|
|
|
|
return old, new
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def rename_incomplete_findings(adr_files, extra_docs):
|
|
|
|
|
"""adr_files: {rel_path: [lines]} for docs/decisions/*.md (the numbered ADRs make
|
|
|
|
|
the assertions). extra_docs: {rel_path: [lines]} for CAPABILITIES.md / ROADMAP.md.
|
|
|
|
|
When a numbered ADR announces a rename 'Old' -> 'New', flag any DESIGN-doc line
|
|
|
|
|
where 'Old' still appears as a whole word in present tense (skipping the announcing
|
|
|
|
|
ADR, lines that also name 'New', and lines carrying a historical/negation cue)."""
|
|
|
|
|
out = []
|
|
|
|
|
# The design-doc set we search: all decisions/*.md plus the two extra docs.
|
|
|
|
|
doc_set = dict(adr_files)
|
|
|
|
|
doc_set.update(extra_docs)
|
|
|
|
|
# Collect assertions only from numbered ADRs (NNN-*.md).
|
|
|
|
|
assertions = [] # (adr_num, announcer_path, old, new)
|
|
|
|
|
for rpath, lines in sorted(adr_files.items()):
|
|
|
|
|
base = os.path.basename(rpath)
|
|
|
|
|
if not ADR_FILE_RE.match(base):
|
|
|
|
|
continue
|
|
|
|
|
adr_num = base[:3]
|
|
|
|
|
for line in lines:
|
|
|
|
|
parsed = _rename_assertion(line)
|
|
|
|
|
if parsed:
|
|
|
|
|
assertions.append((adr_num, rpath, parsed[0], parsed[1]))
|
|
|
|
|
for adr_num, announcer, old, new in assertions:
|
|
|
|
|
old_re = re.compile(r"\b" + re.escape(old) + r"\b") # case-sensitive whole word
|
|
|
|
|
for rpath, lines in sorted(doc_set.items()):
|
|
|
|
|
if rpath == announcer: # the ADR that made the claim is exempt
|
|
|
|
|
continue
|
|
|
|
|
for i, raw in enumerate(lines, 1):
|
|
|
|
|
if not old_re.search(raw):
|
|
|
|
|
continue
|
|
|
|
|
if new in raw: # rename is being explained on this line
|
|
|
|
|
continue
|
|
|
|
|
if RENAME_HIST_RE.search(raw): # legitimate history / negation
|
|
|
|
|
continue
|
|
|
|
|
out.append({"check": "rename-incomplete", "severity": "medium",
|
|
|
|
|
"path": rpath, "line": i,
|
|
|
|
|
"detail": f"ADR-{adr_num} announced rename '{old}' -> "
|
|
|
|
|
f"'{new}' but '{old}' still appears here; confirm the "
|
|
|
|
|
"ripple edit landed or soften the ADR claim"})
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
|
2026-05-30 18:56:01 +02:00
|
|
|
def walk_files():
|
|
|
|
|
for dirpath, dirnames, filenames in os.walk(ROOT):
|
|
|
|
|
dirnames[:] = [d for d in dirnames if d not in PRUNE]
|
|
|
|
|
for f in filenames:
|
|
|
|
|
yield os.path.join(dirpath, f)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def rel(path):
|
|
|
|
|
return os.path.relpath(path, ROOT)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def inventory():
|
|
|
|
|
def listdir(*parts, want_dirs=False, suffixes=None):
|
|
|
|
|
d = os.path.join(ROOT, *parts)
|
|
|
|
|
if not os.path.isdir(d):
|
|
|
|
|
return []
|
|
|
|
|
out = []
|
|
|
|
|
for e in sorted(os.listdir(d)):
|
|
|
|
|
full = os.path.join(d, e)
|
|
|
|
|
if want_dirs and not os.path.isdir(full):
|
|
|
|
|
continue
|
|
|
|
|
if suffixes and not e.endswith(suffixes):
|
|
|
|
|
continue
|
|
|
|
|
out.append(e)
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"roles": listdir("roles", want_dirs=True),
|
|
|
|
|
"adrs": listdir("docs", "decisions", suffixes=(".md",)),
|
|
|
|
|
"runbooks": listdir("docs", "runbooks", suffixes=(".md",)),
|
|
|
|
|
"playbooks": listdir("playbooks", suffixes=(".yml", ".yaml")),
|
|
|
|
|
"scripts": listdir("scripts"),
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def adr_numbers():
|
|
|
|
|
dec = os.path.join(ROOT, "docs", "decisions")
|
|
|
|
|
nums = set()
|
|
|
|
|
if os.path.isdir(dec):
|
|
|
|
|
for f in os.listdir(dec):
|
|
|
|
|
m = re.match(r"(\d{3})-", f)
|
|
|
|
|
if m:
|
|
|
|
|
nums.add(m.group(1))
|
|
|
|
|
return nums
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def scan():
|
|
|
|
|
findings = []
|
|
|
|
|
adrs = adr_numbers()
|
2026-06-05 18:13:49 +02:00
|
|
|
adr_files = {} # docs/decisions/*.md → lines, for deferred-section parsing
|
2026-06-17 17:49:41 +02:00
|
|
|
extra_docs = {} # CAPABILITIES.md / ROADMAP.md → lines, for rename-incomplete
|
2026-06-05 18:13:49 +02:00
|
|
|
defer_refs = [] # repo-wide "resolves ADR-NNN deferred #K" references
|
|
|
|
|
decisions_dir = os.path.join("docs", "decisions")
|
2026-06-17 17:49:41 +02:00
|
|
|
rename_extra = {os.path.join("docs", "CAPABILITIES.md"),
|
|
|
|
|
os.path.join("docs", "ROADMAP.md")}
|
2026-05-30 18:56:01 +02:00
|
|
|
for path in walk_files():
|
|
|
|
|
rpath = rel(path)
|
|
|
|
|
if rpath.startswith(SKIP_PREFIX):
|
|
|
|
|
continue
|
|
|
|
|
name = os.path.basename(path)
|
|
|
|
|
|
|
|
|
|
if name == "vault.yml":
|
|
|
|
|
try:
|
|
|
|
|
text = open(path, encoding="utf-8", errors="replace").read()
|
|
|
|
|
except OSError:
|
|
|
|
|
continue
|
|
|
|
|
if not text.startswith("$ANSIBLE_VAULT"):
|
|
|
|
|
real = [ln for ln in text.splitlines()
|
|
|
|
|
if ln.strip() and not ln.lstrip().startswith("#") and ln.strip() != "---"]
|
|
|
|
|
if real:
|
|
|
|
|
findings.append({"check": "vault-unencrypted", "severity": "high",
|
|
|
|
|
"path": rpath, "line": 1,
|
|
|
|
|
"detail": "vault.yml is not ansible-vault encrypted but has content"})
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if os.path.splitext(path)[1] not in SOURCE_EXTS:
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
lines = open(path, encoding="utf-8", errors="replace").readlines()
|
|
|
|
|
except OSError:
|
|
|
|
|
continue
|
|
|
|
|
|
2026-06-05 18:13:49 +02:00
|
|
|
if rpath.startswith(decisions_dir) and rpath.endswith(".md"):
|
|
|
|
|
adr_files[rpath] = lines
|
2026-06-17 17:49:41 +02:00
|
|
|
if rpath in rename_extra:
|
|
|
|
|
extra_docs[rpath] = lines
|
2026-06-05 18:13:49 +02:00
|
|
|
|
2026-05-30 18:56:01 +02:00
|
|
|
for i, line in enumerate(lines, 1):
|
2026-06-05 18:13:49 +02:00
|
|
|
for m in DEFER_REF_RE.finditer(line):
|
|
|
|
|
defer_refs.append((m.group(1), int(m.group(2)), rpath, i,
|
|
|
|
|
bool(RESOLVE_WORD_RE.search(line))))
|
2026-05-30 18:56:01 +02:00
|
|
|
markers = sorted(set(m.group(1) for m in MARKER_RE.finditer(line)))
|
|
|
|
|
if markers:
|
|
|
|
|
findings.append({"check": "marker", "severity": "low", "path": rpath,
|
|
|
|
|
"line": i, "detail": f"{'/'.join(markers)}: {line.strip()[:120]}"})
|
|
|
|
|
for m in ADR_REF_RE.finditer(line):
|
|
|
|
|
if m.group(1) not in adrs:
|
|
|
|
|
findings.append({"check": "broken-adr-ref", "severity": "medium", "path": rpath,
|
|
|
|
|
"line": i, "detail": f"references ADR-{m.group(1)} (no such file)"})
|
2026-05-30 19:10:58 +02:00
|
|
|
# Only check path-like references that appear inside backticks or a
|
|
|
|
|
# markdown link target — bare prose ("roles/docs") is not a real path.
|
|
|
|
|
for cand in re.findall(r"`([^`]+)`", line) + re.findall(r"\]\(([^)]+)\)", line):
|
|
|
|
|
if "://" in cand: # skip URLs
|
2026-05-30 18:56:01 +02:00
|
|
|
continue
|
2026-05-30 19:10:58 +02:00
|
|
|
pm = PATH_REF_RE.search(cand)
|
|
|
|
|
if not pm:
|
|
|
|
|
continue
|
|
|
|
|
ref = pm.group(0).rstrip(".,);:`'\"")
|
2026-05-30 18:56:01 +02:00
|
|
|
if any(c in ref for c in PLACEHOLDER):
|
|
|
|
|
continue
|
2026-06-05 20:37:40 +02:00
|
|
|
# Skip template / generated-report paths — not real broken refs:
|
|
|
|
|
# - a placeholder (<service>, ${x}) immediately follows the matched path
|
|
|
|
|
# (the regex stops at the placeholder, so it isn't caught above)
|
|
|
|
|
# - a date-template token (YYYY-MM-DD) appears in the path
|
|
|
|
|
# - the path is under a generated-report `reviews/` directory
|
|
|
|
|
if (cand[pm.end():pm.end() + 1] in PLACEHOLDER
|
|
|
|
|
or re.search(r"YYYY|MM-DD", ref)
|
|
|
|
|
or re.search(r"(?:^|/)reviews/", ref)):
|
|
|
|
|
continue
|
2026-05-30 18:56:01 +02:00
|
|
|
if not os.path.exists(os.path.join(ROOT, ref)):
|
|
|
|
|
findings.append({"check": "broken-path-ref", "severity": "medium", "path": rpath,
|
|
|
|
|
"line": i, "detail": f"references '{ref}' which does not exist"})
|
2026-06-05 18:13:49 +02:00
|
|
|
findings.extend(deferred_findings(adr_files, defer_refs))
|
2026-06-10 13:57:42 +02:00
|
|
|
findings.extend(adr_structure_findings(adr_files))
|
2026-06-17 17:49:41 +02:00
|
|
|
findings.extend(rename_incomplete_findings(adr_files, extra_docs))
|
2026-05-30 18:56:01 +02:00
|
|
|
return findings
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def main():
|
|
|
|
|
result = {"root": ROOT, "inventory": inventory(), "findings": scan()}
|
|
|
|
|
json.dump(result, sys.stdout, indent=2)
|
|
|
|
|
sys.stdout.write("\n")
|
|
|
|
|
counts = {}
|
|
|
|
|
for f in result["findings"]:
|
|
|
|
|
counts[f["check"]] = counts.get(f["check"], 0) + 1
|
|
|
|
|
summary = ", ".join(f"{k}={v}" for k, v in sorted(counts.items())) or "no deterministic findings"
|
|
|
|
|
print(f"repo-scan: {len(result['inventory']['roles'])} roles, "
|
|
|
|
|
f"{len(result['inventory']['adrs'])} ADRs; {summary}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
|
main()
|