boma/scripts/friction-scan.py
sjat fd1e83a378 fix(kaizen): scope still_exists to repo paths; test age nudge; tidy --today
- Add REPO_DIRS constant; still_exists now only checks tokens that start
  with a known repo top-level dir, ignoring plugin names (caddy-dns/gandi),
  make command fragments (tf-init/plan), and role-relative paths.
- Add test_still_exists_ignores_non_repo_tokens (was failing before fix).
- Add test_nudge_line_overdue_on_age to close coverage gap on age threshold.
- Add load_signals docstring.
- Replace manual --today date parsing with datetime.date.fromisoformat type
  converter so malformed dates give a clean argparse error.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-14 21:25:03 +02:00

160 lines
5.7 KiB
Python
Executable file

#!/usr/bin/env python3
"""Parse docs/FRICTION.md 'Open signals' into structured data for /kaizen.
Stdlib only. Modes:
--json (default): emit the open signals as JSON (Phase-0 input for /kaizen)
--nudge : print a one-line 'loop overdue?' summary
Authoritative design: docs/superpowers/specs/2026-06-14-kaizen-command-design.md
"""
import argparse
import datetime
import json
import os
import re
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
FRICTION = os.path.join(REPO_ROOT, "docs", "FRICTION.md")
# Nudge thresholds (tunable; the /kaizen self-eval phase revisits these).
NUDGE_MIN_OPEN = 8
NUDGE_MAX_AGE_DAYS = 21
NUDGE_MIN_RECURRENCE = 3
# Top-level repo dirs — only tokens under these are treated as repo-root paths
# for still_exists (avoids false negatives on plugin names, make commands, etc.).
REPO_DIRS = ("roles/", "scripts/", "docs/", "playbooks/", "inventories/", "tests/", "terraform/", ".claude/")
TAG_RE = re.compile(r"`\[(friction|gotcha|recurring|unused)\]`")
DATE_RE = re.compile(r"(\d{4})-(\d{2})-(\d{2})")
ORDINAL_RE = re.compile(r"(\d+)(?:st|nd|rd|th)\s+(?:occurrence|reinforcement|time)", re.I)
DATELIST_RE = re.compile(r"\((\d{2}-\d{2}(?:/[\d/-]+)+)\)")
BACKTICK_RE = re.compile(r"`([^`]+)`")
PATH_EXTS = (".py", ".yml", ".yaml", ".md", ".sh", ".tf", ".j2", ".toml", ".cfg", ".hcl")
def extract_open_section(text):
"""Return the body between '## Open signals' and the next '## ' heading."""
lines = text.splitlines()
start = None
for i, line in enumerate(lines):
if line.strip().lower() == "## open signals":
start = i + 1
break
if start is None:
return ""
end = len(lines)
for j in range(start, len(lines)):
if lines[j].startswith("## "):
end = j
break
return "\n".join(lines[start:end])
def split_signals(section):
"""Split the Open-signals body into raw per-signal blocks.
A signal starts with a top-level '- ' bullet; indented or blank lines are
continuations. Returns a list of multi-line strings with the leading '- '
stripped from the first line."""
signals = []
current = None
for line in section.splitlines():
if line.startswith("- "):
if current is not None:
signals.append("\n".join(current).strip())
current = [line[2:]]
elif current is not None:
if line.strip() == "" or line.startswith(" "):
current.append(line.strip())
else:
signals.append("\n".join(current).strip())
current = None
if current is not None:
signals.append("\n".join(current).strip())
return [s for s in signals if s]
def parse_recurrence(text):
"""Best-effort recurrence count from explicit markers; default 1."""
counts = [1]
m = ORDINAL_RE.search(text)
if m:
counts.append(int(m.group(1)))
dl = DATELIST_RE.search(text)
if dl:
counts.append(dl.group(1).count("/") + 1)
return max(counts)
def parse_paths(text):
"""Backtick tokens that look like repo paths (contain '/' or a known ext)."""
out, seen = [], set()
for m in BACKTICK_RE.finditer(text):
tok = m.group(1).strip()
if ("/" in tok or tok.endswith(PATH_EXTS)) and tok not in seen:
seen.add(tok)
out.append(tok)
return out
def parse_signal(raw, today):
"""Turn one raw signal block into a structured dict."""
tag_m = TAG_RE.search(raw)
date_m = DATE_RE.search(raw)
if date_m:
first_seen = date_m.group(0)
seen = datetime.date(int(date_m.group(1)), int(date_m.group(2)), int(date_m.group(3)))
age_days = (today - seen).days
else:
first_seen = None
age_days = None
paths = parse_paths(raw)
repo_paths = [p for p in paths if p.startswith(REPO_DIRS)]
still_exists = all(os.path.exists(os.path.join(REPO_ROOT, p)) for p in repo_paths) if repo_paths else True
return {
"tag": tag_m.group(1) if tag_m else None,
"first_seen": first_seen,
"age_days": age_days,
"recurrence_count": parse_recurrence(raw),
"referenced_paths": paths,
"still_exists": still_exists,
"text": " ".join(raw.split()),
}
def load_signals(path, today):
"""Read a FRICTION.md file and return its Open signals as parsed dicts."""
with open(path, encoding="utf-8") as fh:
text = fh.read()
return [parse_signal(s, today) for s in split_signals(extract_open_section(text))]
def nudge_line(signals):
n = len(signals)
ages = [s["age_days"] for s in signals if s.get("age_days") is not None]
oldest = max(ages) if ages else 0
max_rec = max((s["recurrence_count"] for s in signals), default=0)
overdue = n >= NUDGE_MIN_OPEN or oldest >= NUDGE_MAX_AGE_DAYS or max_rec >= NUDGE_MIN_RECURRENCE
status = "OVERDUE — run /kaizen" if overdue else "ok"
return f"kaizen: {n} open signals, oldest {oldest}d, max recurrence {max_rec}x — {status}"
def main():
parser = argparse.ArgumentParser(description="Parse FRICTION.md Open signals for /kaizen.")
parser.add_argument("--nudge", action="store_true", help="print a one-line overdue summary")
parser.add_argument("--today", type=datetime.date.fromisoformat,
help="override today's date (YYYY-MM-DD) for testing")
parser.add_argument("--file", default=FRICTION, help="path to FRICTION.md")
args = parser.parse_args()
today = args.today or datetime.date.today()
signals = load_signals(args.file, today)
if args.nudge:
print(nudge_line(signals))
else:
print(json.dumps(signals, indent=2))
if __name__ == "__main__":
main()