From b0c0150db280987c9b8a206723fa5c15c24f14cb Mon Sep 17 00:00:00 2001 From: sjat Date: Wed, 17 Jun 2026 17:49:41 +0200 Subject: [PATCH] feat(scan): repo-scan rename-incomplete check (kaizen) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a numbered ADR announces a rename Old->New, flag design-doc lines where Old still appears in present tense — skipping the announcing ADR, lines that also name New, and historical/negation cues, and rejecting ADR-NNN tokens as terms. Structural cousin of stale-deferred; run by /review-repo. Zero findings on the current tree (the Traefik->Caddy ripple edits have landed). Consumes the 2026-06-14 KEEP-OPEN signal in docs/FRICTION.md. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/repo-scan.py | 120 ++++++++++++++++++++++++++++++++++++++++ tests/test_repo_scan.py | 96 ++++++++++++++++++++++++++++++++ 2 files changed, 216 insertions(+) diff --git a/scripts/repo-scan.py b/scripts/repo-scan.py index 95ca228..78f1e13 100644 --- a/scripts/repo-scan.py +++ b/scripts/repo-scan.py @@ -41,6 +41,42 @@ LIST_ITEM_RE = re.compile(r"^\s*(\d+\.|[-*+])\s+(.*)") DEFER_REF_RE = re.compile(r"ADR-(\d{3})\D{0,40}?deferred\D{0,12}?(\d+)", re.I) RESOLVE_WORD_RE = re.compile(r"\b(?:resolv\w*|decid\w*|address\w*|complet\w*|done)\b", re.I) +# Rename-incomplete detection: an ADR announces a rename/supersession of a named +# term (Old → New); verify the OLD name no longer lingers in the design-doc set. +# (The structural cousin of stale-deferred — see docs/FRICTION.md, ADR-024.) +# A "specific" name is a backticked token or a capitalised proper-noun/identifier; +# common connective words are rejected so they can't be mistaken for a tool name. +_NAME = r"(?:`[^`]+`|[A-Z][A-Za-z0-9_+.-]{2,})" +RENAME_STOPWORDS = { + "was", "were", "the", "this", "that", "with", "from", "into", "and", "but", + "for", "are", "has", "had", "been", "now", "not", "all", "any", "use", "used", + "via", "per", "its", "our", "one", "two", "old", "new", "phase", "step", + "adr", "read", "name", "term", "tool", "prose", "roadmap", +} +# Trigger forms — each captures (old, new) as raw name tokens; the connective words +# are case-insensitive but the names must still satisfy _NAME (specific tokens). +RENAME_ASSERT_RES = ( + # renamed X to Y + re.compile(rf"renamed\s+(?:from\s+)?({_NAME})\s+to\s+({_NAME})", re.I), + # replaced X with Y + re.compile(rf"replac\w*\s+({_NAME})\s+with\s+({_NAME})", re.I), + # superseded X with/by Y + re.compile(rf"supersed\w*\s+({_NAME})\s+(?:with|by)\s+({_NAME})", re.I), + # X ... (is/are/was/were/been) updated to read Y + re.compile(rf"({_NAME})\b.{{0,40}}?\b(?:is|are|was|were|been)?\s*" + rf"updated\s+to\s+read\s+[\"']?({_NAME})", re.I), + # X → Y / X -> Y on a line that also carries a rename/supersede/update cue + re.compile(rf"({_NAME})\s*(?:->|→)\s*({_NAME})"), +) +RENAME_ARROW_RES = (RENAME_ASSERT_RES[-1],) # arrow forms need a cue word on the line +RENAME_CUE_RE = re.compile(r"\b(?:renam\w*|replac\w*|supersed\w*|updated|rename)\b", re.I) +# Historical / negation cues — a lingering OLD name on such a line is legitimate +# history, not a missed ripple edit, so it is skipped. +RENAME_HIST_RE = re.compile( + r"\b(?:was|were|formerly|previously|no longer|instead of|rather than|reject\w*|" + r"reconsider\w*|supersed\w*|deprecat\w*|legacy|history|heritage|V4|" + r"actually ran|used to)\b", re.I) + # ADR-structure check (ADR-023): numbered ADRs must carry the four mandatory # sections and a parseable Status line. Presence only — section ORDER is a # template-demonstrated convention, not machine-enforced. @@ -142,6 +178,84 @@ def adr_structure_findings(adr_files): return out +def _clean_name(tok): + """Strip backticks/quotes from a captured name token. Return the bare name, or + None if it is not a 'specific' token (empty, multi-word, or a stopword).""" + s = tok.strip().strip("`\"'").strip() + s = s.rstrip(".,;:!?)") # trailing sentence punctuation is not part of the name + if not s or " " in s: + return None + if s.lower() in RENAME_STOPWORDS: + return None + # An ADR reference (ADR-017) is a document pointer, never the renamed *term* — a + # sentence like "the ADR-017 prose ... is updated to read Caddy" must not parse + # ADR-017 as the old name. Reject it so such lines skip (precision >> recall). + if re.fullmatch(r"ADR-\d{3}", s): + return None + # Must be backtick-able identifier or a capitalised proper noun (the _NAME shape + # already enforced this on capture; this is the after-stripping re-check). + if not re.fullmatch(r"[A-Za-z0-9_+.-]{3,}", s): + return None + return s + + +def _rename_assertion(line): + """Parse a single ADR line for a tight Old→New rename assertion. Returns + (old, new) of cleaned specific names, or None. Conservative: precision >> recall.""" + for rx in RENAME_ASSERT_RES: + m = rx.search(line) + if not m: + continue + # Arrow form only counts when the line also carries a rename/supersede cue. + if rx in RENAME_ARROW_RES and not RENAME_CUE_RE.search(line): + continue + old, new = _clean_name(m.group(1)), _clean_name(m.group(2)) + if old and new and old != new: + return old, new + return None + + +def rename_incomplete_findings(adr_files, extra_docs): + """adr_files: {rel_path: [lines]} for docs/decisions/*.md (the numbered ADRs make + the assertions). extra_docs: {rel_path: [lines]} for CAPABILITIES.md / ROADMAP.md. + When a numbered ADR announces a rename 'Old' -> 'New', flag any DESIGN-doc line + where 'Old' still appears as a whole word in present tense (skipping the announcing + ADR, lines that also name 'New', and lines carrying a historical/negation cue).""" + out = [] + # The design-doc set we search: all decisions/*.md plus the two extra docs. + doc_set = dict(adr_files) + doc_set.update(extra_docs) + # Collect assertions only from numbered ADRs (NNN-*.md). + assertions = [] # (adr_num, announcer_path, old, new) + for rpath, lines in sorted(adr_files.items()): + base = os.path.basename(rpath) + if not ADR_FILE_RE.match(base): + continue + adr_num = base[:3] + for line in lines: + parsed = _rename_assertion(line) + if parsed: + assertions.append((adr_num, rpath, parsed[0], parsed[1])) + for adr_num, announcer, old, new in assertions: + old_re = re.compile(r"\b" + re.escape(old) + r"\b") # case-sensitive whole word + for rpath, lines in sorted(doc_set.items()): + if rpath == announcer: # the ADR that made the claim is exempt + continue + for i, raw in enumerate(lines, 1): + if not old_re.search(raw): + continue + if new in raw: # rename is being explained on this line + continue + if RENAME_HIST_RE.search(raw): # legitimate history / negation + continue + out.append({"check": "rename-incomplete", "severity": "medium", + "path": rpath, "line": i, + "detail": f"ADR-{adr_num} announced rename '{old}' -> " + f"'{new}' but '{old}' still appears here; confirm the " + "ripple edit landed or soften the ADR claim"}) + return out + + def walk_files(): for dirpath, dirnames, filenames in os.walk(ROOT): dirnames[:] = [d for d in dirnames if d not in PRUNE] @@ -192,8 +306,11 @@ def scan(): findings = [] adrs = adr_numbers() adr_files = {} # docs/decisions/*.md → lines, for deferred-section parsing + extra_docs = {} # CAPABILITIES.md / ROADMAP.md → lines, for rename-incomplete defer_refs = [] # repo-wide "resolves ADR-NNN deferred #K" references decisions_dir = os.path.join("docs", "decisions") + rename_extra = {os.path.join("docs", "CAPABILITIES.md"), + os.path.join("docs", "ROADMAP.md")} for path in walk_files(): rpath = rel(path) if rpath.startswith(SKIP_PREFIX): @@ -223,6 +340,8 @@ def scan(): if rpath.startswith(decisions_dir) and rpath.endswith(".md"): adr_files[rpath] = lines + if rpath in rename_extra: + extra_docs[rpath] = lines for i, line in enumerate(lines, 1): for m in DEFER_REF_RE.finditer(line): @@ -261,6 +380,7 @@ def scan(): "line": i, "detail": f"references '{ref}' which does not exist"}) findings.extend(deferred_findings(adr_files, defer_refs)) findings.extend(adr_structure_findings(adr_files)) + findings.extend(rename_incomplete_findings(adr_files, extra_docs)) return findings diff --git a/tests/test_repo_scan.py b/tests/test_repo_scan.py index ac776c1..0028ec1 100644 --- a/tests/test_repo_scan.py +++ b/tests/test_repo_scan.py @@ -57,3 +57,99 @@ def test_non_numbered_file_is_skipped(): bare = ["# ADR template\n", "\n", "## Status\n", "\n", "\n"] out = _checks(rs.adr_structure_findings({"docs/decisions/adr-template.md": bare})) assert out == [] + + +# --- rename-incomplete ------------------------------------------------------- + +def _renames(findings): + return [f for f in findings if f["check"] == "rename-incomplete"] + + +def test_rename_incomplete_flags_lingering_old_name(): + # ADR announces `Foo` -> `Bar`; another decisions file still says Foo present-tense. + announcer = {"docs/decisions/050-rename.md": [ + "## Decision\n", "We renamed `Foo` to `Bar` across the design docs.\n"]} + other = {} # extra_docs (CAPABILITIES/ROADMAP) — none here + lingering = {"docs/decisions/030-other.md": [ + "The Foo proxy renders config from the catalog.\n"]} + announcer.update(lingering) + out = _renames(rs.rename_incomplete_findings(announcer, other)) + assert len(out) == 1 + assert out[0]["path"] == "docs/decisions/030-other.md" + assert out[0]["line"] == 1 + assert out[0]["severity"] == "medium" + assert "Foo" in out[0]["detail"] and "Bar" in out[0]["detail"] + + +def test_rename_incomplete_clean_rename_has_no_findings(): + # The rename announced, and no other doc still mentions Foo. + adr_files = { + "docs/decisions/050-rename.md": [ + "## Decision\n", "We renamed `Foo` to `Bar` across the design docs.\n"], + "docs/decisions/030-other.md": [ + "The Bar proxy renders config from the catalog.\n"], + } + out = _renames(rs.rename_incomplete_findings(adr_files, {})) + assert out == [] + + +def test_rename_incomplete_skips_historical_cue_line(): + # Foo lingers only on a line carrying a historical/negation cue → no finding. + adr_files = { + "docs/decisions/050-rename.md": [ + "## Decision\n", "We renamed `Foo` to `Bar` across the design docs.\n"], + "docs/decisions/030-other.md": [ + "Foo was rejected; we run Bar now.\n", + "The history of Foo informs the choice.\n"], + } + out = _renames(rs.rename_incomplete_findings(adr_files, {})) + assert out == [] + + +def test_rename_incomplete_skips_announcing_adr_itself(): + # The announcing ADR mentions Foo (it has to) — must not flag itself. + adr_files = { + "docs/decisions/050-rename.md": [ + "## Decision\n", + "We renamed `Foo` to `Bar`.\n", + "Operators who configured Foo should switch their habits.\n"], + } + out = _renames(rs.rename_incomplete_findings(adr_files, {})) + assert out == [] + + +def test_rename_incomplete_skips_line_naming_new_term(): + # A line that mentions both Foo and Bar is explaining the rename → skipped. + adr_files = { + "docs/decisions/050-rename.md": [ + "## Decision\n", "We renamed `Foo` to `Bar`.\n"], + "docs/decisions/030-other.md": [ + "Foo is being phased out for Bar in this paragraph.\n"], + } + out = _renames(rs.rename_incomplete_findings(adr_files, {})) + assert out == [] + + +def test_rename_incomplete_searches_extra_docs(): + # A lingering OLD name in CAPABILITIES.md (an extra_docs file) is flagged. + adr_files = {"docs/decisions/050-rename.md": [ + "## Decision\n", "We renamed `Foo` to `Bar`.\n"]} + extra = {"docs/CAPABILITIES.md": ["The Foo proxy is what we deploy.\n"]} + out = _renames(rs.rename_incomplete_findings(adr_files, extra)) + assert len(out) == 1 + assert out[0]["path"] == "docs/CAPABILITIES.md" + + +def test_rename_incomplete_ignores_ambiguous_adr_pointer_assertion(): + # "the ADR-017 prose ... is updated to read Caddy" must NOT parse ADR-017 as the + # old name (it is a doc pointer). With ADR-017 rejected, no assertion → no finding, + # even though 'ADR-017' appears in many other docs. + adr_files = { + "docs/decisions/024-reverse-proxy.md": [ + "## Consequences\n", + '- ADR-017 prose that mentioned Traefik is updated to read "Caddy".\n'], + "docs/decisions/008-testing.md": [ + "Level 4 UI verification follows ADR-017.\n"], + } + out = _renames(rs.rename_incomplete_findings(adr_files, {})) + assert out == []