boma/scripts/friction-scan.py
sjat c6f66ee634 feat(kaizen): recurrence count + referenced-path existence
Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-14 21:17:39 +02:00

117 lines
3.8 KiB
Python

#!/usr/bin/env python3
"""Parse docs/FRICTION.md 'Open signals' into structured data for /kaizen.
Stdlib only. Modes:
--json (default): emit the open signals as JSON (Phase-0 input for /kaizen)
--nudge : print a one-line 'loop overdue?' summary
Authoritative design: docs/superpowers/specs/2026-06-14-kaizen-command-design.md
"""
import argparse
import datetime
import json
import os
import re
REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
FRICTION = os.path.join(REPO_ROOT, "docs", "FRICTION.md")
TAG_RE = re.compile(r"`\[(friction|gotcha|recurring|unused)\]`")
DATE_RE = re.compile(r"(\d{4})-(\d{2})-(\d{2})")
ORDINAL_RE = re.compile(r"(\d+)(?:st|nd|rd|th)\s+(?:occurrence|reinforcement|time)", re.I)
DATELIST_RE = re.compile(r"\((\d{2}-\d{2}(?:/[\d/-]+)+)\)")
BACKTICK_RE = re.compile(r"`([^`]+)`")
PATH_EXTS = (".py", ".yml", ".yaml", ".md", ".sh", ".tf", ".j2", ".toml", ".cfg", ".hcl")
def extract_open_section(text):
"""Return the body between '## Open signals' and the next '## ' heading."""
lines = text.splitlines()
start = None
for i, line in enumerate(lines):
if line.strip().lower() == "## open signals":
start = i + 1
break
if start is None:
return ""
end = len(lines)
for j in range(start, len(lines)):
if lines[j].startswith("## "):
end = j
break
return "\n".join(lines[start:end])
def split_signals(section):
"""Split the Open-signals body into raw per-signal blocks.
A signal starts with a top-level '- ' bullet; indented or blank lines are
continuations. Returns a list of multi-line strings with the leading '- '
stripped from the first line."""
signals = []
current = None
for line in section.splitlines():
if line.startswith("- "):
if current is not None:
signals.append("\n".join(current).strip())
current = [line[2:]]
elif current is not None:
if line.strip() == "" or line.startswith(" "):
current.append(line.strip())
else:
signals.append("\n".join(current).strip())
current = None
if current is not None:
signals.append("\n".join(current).strip())
return [s for s in signals if s]
def parse_recurrence(text):
"""Best-effort recurrence count from explicit markers; default 1."""
counts = [1]
m = ORDINAL_RE.search(text)
if m:
counts.append(int(m.group(1)))
dl = DATELIST_RE.search(text)
if dl:
counts.append(dl.group(1).count("/") + 1)
return max(counts)
def parse_paths(text):
"""Backtick tokens that look like repo paths (contain '/' or a known ext)."""
out, seen = [], set()
for m in BACKTICK_RE.finditer(text):
tok = m.group(1).strip()
if ("/" in tok or tok.endswith(PATH_EXTS)) and tok not in seen:
seen.add(tok)
out.append(tok)
return out
def parse_signal(raw, today):
"""Turn one raw signal block into a structured dict."""
tag_m = TAG_RE.search(raw)
date_m = DATE_RE.search(raw)
if date_m:
first_seen = date_m.group(0)
seen = datetime.date(int(date_m.group(1)), int(date_m.group(2)), int(date_m.group(3)))
age_days = (today - seen).days
else:
first_seen = None
age_days = None
paths = parse_paths(raw)
still_exists = all(os.path.exists(os.path.join(REPO_ROOT, p)) for p in paths) if paths else True
return {
"tag": tag_m.group(1) if tag_m else None,
"first_seen": first_seen,
"age_days": age_days,
"recurrence_count": parse_recurrence(raw),
"referenced_paths": paths,
"still_exists": still_exists,
"text": " ".join(raw.split()),
}
if __name__ == "__main__": # pragma: no cover (filled in Task 4)
pass