boma/scripts/friction-scan.py

#!/usr/bin/env python3
"""Parse docs/FRICTION.md 'Open signals' into structured data for /kaizen.

Stdlib only. Modes:
  --json  (default): emit the open signals as JSON (Phase-0 input for /kaizen)
  --nudge          : print a one-line 'loop overdue?' summary

Authoritative design: docs/superpowers/specs/2026-06-14-kaizen-command-design.md
"""
import argparse
import datetime
import json
import os
import re

REPO_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
FRICTION = os.path.join(REPO_ROOT, "docs", "FRICTION.md")

TAG_RE = re.compile(r"`\[(friction|gotcha|recurring|unused)\]`")
DATE_RE = re.compile(r"(\d{4})-(\d{2})-(\d{2})")
ORDINAL_RE = re.compile(r"(\d+)(?:st|nd|rd|th)\s+(?:occurrence|reinforcement|time)", re.I)
DATELIST_RE = re.compile(r"\((\d{2}-\d{2}(?:/[\d/-]+)+)\)")
BACKTICK_RE = re.compile(r"`([^`]+)`")
PATH_EXTS = (".py", ".yml", ".yaml", ".md", ".sh", ".tf", ".j2", ".toml", ".cfg", ".hcl")


def extract_open_section(text):
    """Return the body between '## Open signals' and the next '## ' heading."""
    lines = text.splitlines()
    start = None
    for i, line in enumerate(lines):
        if line.strip().lower() == "## open signals":
            start = i + 1
            break
    if start is None:
        return ""
    end = len(lines)
    for j in range(start, len(lines)):
        if lines[j].startswith("## "):
            end = j
            break
    return "\n".join(lines[start:end])


def split_signals(section):
    """Split the Open-signals body into raw per-signal blocks.

    A signal starts with a top-level '- ' bullet; indented or blank lines are
    continuations. Returns a list of multi-line strings with the leading '- '
    stripped from the first line."""
    signals = []
    current = None
    for line in section.splitlines():
        if line.startswith("- "):
            if current is not None:
                signals.append("\n".join(current).strip())
            current = [line[2:]]
        elif current is not None:
            if line.strip() == "" or line.startswith("  "):
                current.append(line.strip())
            else:
                signals.append("\n".join(current).strip())
                current = None
    if current is not None:
        signals.append("\n".join(current).strip())
    return [s for s in signals if s]


def parse_recurrence(text):
    """Best-effort recurrence count from explicit markers; default 1."""
    counts = [1]
    m = ORDINAL_RE.search(text)
    if m:
        counts.append(int(m.group(1)))
    dl = DATELIST_RE.search(text)
    if dl:
        counts.append(dl.group(1).count("/") + 1)
    return max(counts)


def parse_paths(text):
    """Backtick tokens that look like repo paths (contain '/' or a known ext)."""
    out, seen = [], set()
    for m in BACKTICK_RE.finditer(text):
        tok = m.group(1).strip()
        if ("/" in tok or tok.endswith(PATH_EXTS)) and tok not in seen:
            seen.add(tok)
            out.append(tok)
    return out


def parse_signal(raw, today):
    """Turn one raw signal block into a structured dict."""
    tag_m = TAG_RE.search(raw)
    date_m = DATE_RE.search(raw)
    if date_m:
        first_seen = date_m.group(0)
        seen = datetime.date(int(date_m.group(1)), int(date_m.group(2)), int(date_m.group(3)))
        age_days = (today - seen).days
    else:
        first_seen = None
        age_days = None
    paths = parse_paths(raw)
    still_exists = all(os.path.exists(os.path.join(REPO_ROOT, p)) for p in paths) if paths else True
    return {
        "tag": tag_m.group(1) if tag_m else None,
        "first_seen": first_seen,
        "age_days": age_days,
        "recurrence_count": parse_recurrence(raw),
        "referenced_paths": paths,
        "still_exists": still_exists,
        "text": " ".join(raw.split()),
    }


if __name__ == "__main__":  # pragma: no cover (filled in Task 4)
    pass