feat(scan): repo-scan rename-incomplete check (kaizen)
When a numbered ADR announces a rename Old->New, flag design-doc lines where Old still appears in present tense — skipping the announcing ADR, lines that also name New, and historical/negation cues, and rejecting ADR-NNN tokens as terms. Structural cousin of stale-deferred; run by /review-repo. Zero findings on the current tree (the Traefik->Caddy ripple edits have landed). Consumes the 2026-06-14 KEEP-OPEN signal in docs/FRICTION.md. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
959f9b30b5
commit
b0c0150db2
2 changed files with 216 additions and 0 deletions
|
|
@ -41,6 +41,42 @@ LIST_ITEM_RE = re.compile(r"^\s*(\d+\.|[-*+])\s+(.*)")
|
|||
DEFER_REF_RE = re.compile(r"ADR-(\d{3})\D{0,40}?deferred\D{0,12}?(\d+)", re.I)
|
||||
RESOLVE_WORD_RE = re.compile(r"\b(?:resolv\w*|decid\w*|address\w*|complet\w*|done)\b", re.I)
|
||||
|
||||
# Rename-incomplete detection: an ADR announces a rename/supersession of a named
|
||||
# term (Old → New); verify the OLD name no longer lingers in the design-doc set.
|
||||
# (The structural cousin of stale-deferred — see docs/FRICTION.md, ADR-024.)
|
||||
# A "specific" name is a backticked token or a capitalised proper-noun/identifier;
|
||||
# common connective words are rejected so they can't be mistaken for a tool name.
|
||||
_NAME = r"(?:`[^`]+`|[A-Z][A-Za-z0-9_+.-]{2,})"
|
||||
RENAME_STOPWORDS = {
|
||||
"was", "were", "the", "this", "that", "with", "from", "into", "and", "but",
|
||||
"for", "are", "has", "had", "been", "now", "not", "all", "any", "use", "used",
|
||||
"via", "per", "its", "our", "one", "two", "old", "new", "phase", "step",
|
||||
"adr", "read", "name", "term", "tool", "prose", "roadmap",
|
||||
}
|
||||
# Trigger forms — each captures (old, new) as raw name tokens; the connective words
|
||||
# are case-insensitive but the names must still satisfy _NAME (specific tokens).
|
||||
RENAME_ASSERT_RES = (
|
||||
# renamed X to Y
|
||||
re.compile(rf"renamed\s+(?:from\s+)?({_NAME})\s+to\s+({_NAME})", re.I),
|
||||
# replaced X with Y
|
||||
re.compile(rf"replac\w*\s+({_NAME})\s+with\s+({_NAME})", re.I),
|
||||
# superseded X with/by Y
|
||||
re.compile(rf"supersed\w*\s+({_NAME})\s+(?:with|by)\s+({_NAME})", re.I),
|
||||
# X ... (is/are/was/were/been) updated to read Y
|
||||
re.compile(rf"({_NAME})\b.{{0,40}}?\b(?:is|are|was|were|been)?\s*"
|
||||
rf"updated\s+to\s+read\s+[\"']?({_NAME})", re.I),
|
||||
# X → Y / X -> Y on a line that also carries a rename/supersede/update cue
|
||||
re.compile(rf"({_NAME})\s*(?:->|→)\s*({_NAME})"),
|
||||
)
|
||||
RENAME_ARROW_RES = (RENAME_ASSERT_RES[-1],) # arrow forms need a cue word on the line
|
||||
RENAME_CUE_RE = re.compile(r"\b(?:renam\w*|replac\w*|supersed\w*|updated|rename)\b", re.I)
|
||||
# Historical / negation cues — a lingering OLD name on such a line is legitimate
|
||||
# history, not a missed ripple edit, so it is skipped.
|
||||
RENAME_HIST_RE = re.compile(
|
||||
r"\b(?:was|were|formerly|previously|no longer|instead of|rather than|reject\w*|"
|
||||
r"reconsider\w*|supersed\w*|deprecat\w*|legacy|history|heritage|V4|"
|
||||
r"actually ran|used to)\b", re.I)
|
||||
|
||||
# ADR-structure check (ADR-023): numbered ADRs must carry the four mandatory
|
||||
# sections and a parseable Status line. Presence only — section ORDER is a
|
||||
# template-demonstrated convention, not machine-enforced.
|
||||
|
|
@ -142,6 +178,84 @@ def adr_structure_findings(adr_files):
|
|||
return out
|
||||
|
||||
|
||||
def _clean_name(tok):
|
||||
"""Strip backticks/quotes from a captured name token. Return the bare name, or
|
||||
None if it is not a 'specific' token (empty, multi-word, or a stopword)."""
|
||||
s = tok.strip().strip("`\"'").strip()
|
||||
s = s.rstrip(".,;:!?)") # trailing sentence punctuation is not part of the name
|
||||
if not s or " " in s:
|
||||
return None
|
||||
if s.lower() in RENAME_STOPWORDS:
|
||||
return None
|
||||
# An ADR reference (ADR-017) is a document pointer, never the renamed *term* — a
|
||||
# sentence like "the ADR-017 prose ... is updated to read Caddy" must not parse
|
||||
# ADR-017 as the old name. Reject it so such lines skip (precision >> recall).
|
||||
if re.fullmatch(r"ADR-\d{3}", s):
|
||||
return None
|
||||
# Must be backtick-able identifier or a capitalised proper noun (the _NAME shape
|
||||
# already enforced this on capture; this is the after-stripping re-check).
|
||||
if not re.fullmatch(r"[A-Za-z0-9_+.-]{3,}", s):
|
||||
return None
|
||||
return s
|
||||
|
||||
|
||||
def _rename_assertion(line):
|
||||
"""Parse a single ADR line for a tight Old→New rename assertion. Returns
|
||||
(old, new) of cleaned specific names, or None. Conservative: precision >> recall."""
|
||||
for rx in RENAME_ASSERT_RES:
|
||||
m = rx.search(line)
|
||||
if not m:
|
||||
continue
|
||||
# Arrow form only counts when the line also carries a rename/supersede cue.
|
||||
if rx in RENAME_ARROW_RES and not RENAME_CUE_RE.search(line):
|
||||
continue
|
||||
old, new = _clean_name(m.group(1)), _clean_name(m.group(2))
|
||||
if old and new and old != new:
|
||||
return old, new
|
||||
return None
|
||||
|
||||
|
||||
def rename_incomplete_findings(adr_files, extra_docs):
|
||||
"""adr_files: {rel_path: [lines]} for docs/decisions/*.md (the numbered ADRs make
|
||||
the assertions). extra_docs: {rel_path: [lines]} for CAPABILITIES.md / ROADMAP.md.
|
||||
When a numbered ADR announces a rename 'Old' -> 'New', flag any DESIGN-doc line
|
||||
where 'Old' still appears as a whole word in present tense (skipping the announcing
|
||||
ADR, lines that also name 'New', and lines carrying a historical/negation cue)."""
|
||||
out = []
|
||||
# The design-doc set we search: all decisions/*.md plus the two extra docs.
|
||||
doc_set = dict(adr_files)
|
||||
doc_set.update(extra_docs)
|
||||
# Collect assertions only from numbered ADRs (NNN-*.md).
|
||||
assertions = [] # (adr_num, announcer_path, old, new)
|
||||
for rpath, lines in sorted(adr_files.items()):
|
||||
base = os.path.basename(rpath)
|
||||
if not ADR_FILE_RE.match(base):
|
||||
continue
|
||||
adr_num = base[:3]
|
||||
for line in lines:
|
||||
parsed = _rename_assertion(line)
|
||||
if parsed:
|
||||
assertions.append((adr_num, rpath, parsed[0], parsed[1]))
|
||||
for adr_num, announcer, old, new in assertions:
|
||||
old_re = re.compile(r"\b" + re.escape(old) + r"\b") # case-sensitive whole word
|
||||
for rpath, lines in sorted(doc_set.items()):
|
||||
if rpath == announcer: # the ADR that made the claim is exempt
|
||||
continue
|
||||
for i, raw in enumerate(lines, 1):
|
||||
if not old_re.search(raw):
|
||||
continue
|
||||
if new in raw: # rename is being explained on this line
|
||||
continue
|
||||
if RENAME_HIST_RE.search(raw): # legitimate history / negation
|
||||
continue
|
||||
out.append({"check": "rename-incomplete", "severity": "medium",
|
||||
"path": rpath, "line": i,
|
||||
"detail": f"ADR-{adr_num} announced rename '{old}' -> "
|
||||
f"'{new}' but '{old}' still appears here; confirm the "
|
||||
"ripple edit landed or soften the ADR claim"})
|
||||
return out
|
||||
|
||||
|
||||
def walk_files():
|
||||
for dirpath, dirnames, filenames in os.walk(ROOT):
|
||||
dirnames[:] = [d for d in dirnames if d not in PRUNE]
|
||||
|
|
@ -192,8 +306,11 @@ def scan():
|
|||
findings = []
|
||||
adrs = adr_numbers()
|
||||
adr_files = {} # docs/decisions/*.md → lines, for deferred-section parsing
|
||||
extra_docs = {} # CAPABILITIES.md / ROADMAP.md → lines, for rename-incomplete
|
||||
defer_refs = [] # repo-wide "resolves ADR-NNN deferred #K" references
|
||||
decisions_dir = os.path.join("docs", "decisions")
|
||||
rename_extra = {os.path.join("docs", "CAPABILITIES.md"),
|
||||
os.path.join("docs", "ROADMAP.md")}
|
||||
for path in walk_files():
|
||||
rpath = rel(path)
|
||||
if rpath.startswith(SKIP_PREFIX):
|
||||
|
|
@ -223,6 +340,8 @@ def scan():
|
|||
|
||||
if rpath.startswith(decisions_dir) and rpath.endswith(".md"):
|
||||
adr_files[rpath] = lines
|
||||
if rpath in rename_extra:
|
||||
extra_docs[rpath] = lines
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
for m in DEFER_REF_RE.finditer(line):
|
||||
|
|
@ -261,6 +380,7 @@ def scan():
|
|||
"line": i, "detail": f"references '{ref}' which does not exist"})
|
||||
findings.extend(deferred_findings(adr_files, defer_refs))
|
||||
findings.extend(adr_structure_findings(adr_files))
|
||||
findings.extend(rename_incomplete_findings(adr_files, extra_docs))
|
||||
return findings
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -57,3 +57,99 @@ def test_non_numbered_file_is_skipped():
|
|||
bare = ["# ADR template\n", "\n", "## Status\n", "\n", "<!-- hint -->\n"]
|
||||
out = _checks(rs.adr_structure_findings({"docs/decisions/adr-template.md": bare}))
|
||||
assert out == []
|
||||
|
||||
|
||||
# --- rename-incomplete -------------------------------------------------------
|
||||
|
||||
def _renames(findings):
|
||||
return [f for f in findings if f["check"] == "rename-incomplete"]
|
||||
|
||||
|
||||
def test_rename_incomplete_flags_lingering_old_name():
|
||||
# ADR announces `Foo` -> `Bar`; another decisions file still says Foo present-tense.
|
||||
announcer = {"docs/decisions/050-rename.md": [
|
||||
"## Decision\n", "We renamed `Foo` to `Bar` across the design docs.\n"]}
|
||||
other = {} # extra_docs (CAPABILITIES/ROADMAP) — none here
|
||||
lingering = {"docs/decisions/030-other.md": [
|
||||
"The Foo proxy renders config from the catalog.\n"]}
|
||||
announcer.update(lingering)
|
||||
out = _renames(rs.rename_incomplete_findings(announcer, other))
|
||||
assert len(out) == 1
|
||||
assert out[0]["path"] == "docs/decisions/030-other.md"
|
||||
assert out[0]["line"] == 1
|
||||
assert out[0]["severity"] == "medium"
|
||||
assert "Foo" in out[0]["detail"] and "Bar" in out[0]["detail"]
|
||||
|
||||
|
||||
def test_rename_incomplete_clean_rename_has_no_findings():
|
||||
# The rename announced, and no other doc still mentions Foo.
|
||||
adr_files = {
|
||||
"docs/decisions/050-rename.md": [
|
||||
"## Decision\n", "We renamed `Foo` to `Bar` across the design docs.\n"],
|
||||
"docs/decisions/030-other.md": [
|
||||
"The Bar proxy renders config from the catalog.\n"],
|
||||
}
|
||||
out = _renames(rs.rename_incomplete_findings(adr_files, {}))
|
||||
assert out == []
|
||||
|
||||
|
||||
def test_rename_incomplete_skips_historical_cue_line():
|
||||
# Foo lingers only on a line carrying a historical/negation cue → no finding.
|
||||
adr_files = {
|
||||
"docs/decisions/050-rename.md": [
|
||||
"## Decision\n", "We renamed `Foo` to `Bar` across the design docs.\n"],
|
||||
"docs/decisions/030-other.md": [
|
||||
"Foo was rejected; we run Bar now.\n",
|
||||
"The history of Foo informs the choice.\n"],
|
||||
}
|
||||
out = _renames(rs.rename_incomplete_findings(adr_files, {}))
|
||||
assert out == []
|
||||
|
||||
|
||||
def test_rename_incomplete_skips_announcing_adr_itself():
|
||||
# The announcing ADR mentions Foo (it has to) — must not flag itself.
|
||||
adr_files = {
|
||||
"docs/decisions/050-rename.md": [
|
||||
"## Decision\n",
|
||||
"We renamed `Foo` to `Bar`.\n",
|
||||
"Operators who configured Foo should switch their habits.\n"],
|
||||
}
|
||||
out = _renames(rs.rename_incomplete_findings(adr_files, {}))
|
||||
assert out == []
|
||||
|
||||
|
||||
def test_rename_incomplete_skips_line_naming_new_term():
|
||||
# A line that mentions both Foo and Bar is explaining the rename → skipped.
|
||||
adr_files = {
|
||||
"docs/decisions/050-rename.md": [
|
||||
"## Decision\n", "We renamed `Foo` to `Bar`.\n"],
|
||||
"docs/decisions/030-other.md": [
|
||||
"Foo is being phased out for Bar in this paragraph.\n"],
|
||||
}
|
||||
out = _renames(rs.rename_incomplete_findings(adr_files, {}))
|
||||
assert out == []
|
||||
|
||||
|
||||
def test_rename_incomplete_searches_extra_docs():
|
||||
# A lingering OLD name in CAPABILITIES.md (an extra_docs file) is flagged.
|
||||
adr_files = {"docs/decisions/050-rename.md": [
|
||||
"## Decision\n", "We renamed `Foo` to `Bar`.\n"]}
|
||||
extra = {"docs/CAPABILITIES.md": ["The Foo proxy is what we deploy.\n"]}
|
||||
out = _renames(rs.rename_incomplete_findings(adr_files, extra))
|
||||
assert len(out) == 1
|
||||
assert out[0]["path"] == "docs/CAPABILITIES.md"
|
||||
|
||||
|
||||
def test_rename_incomplete_ignores_ambiguous_adr_pointer_assertion():
|
||||
# "the ADR-017 prose ... is updated to read Caddy" must NOT parse ADR-017 as the
|
||||
# old name (it is a doc pointer). With ADR-017 rejected, no assertion → no finding,
|
||||
# even though 'ADR-017' appears in many other docs.
|
||||
adr_files = {
|
||||
"docs/decisions/024-reverse-proxy.md": [
|
||||
"## Consequences\n",
|
||||
'- ADR-017 prose that mentioned Traefik is updated to read "Caddy".\n'],
|
||||
"docs/decisions/008-testing.md": [
|
||||
"Level 4 UI verification follows ADR-017.\n"],
|
||||
}
|
||||
out = _renames(rs.rename_incomplete_findings(adr_files, {}))
|
||||
assert out == []
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue