From c6f66ee634a0141a59dc98061662067f97de73a7 Mon Sep 17 00:00:00 2001 From: sjat Date: Sun, 14 Jun 2026 21:17:39 +0200 Subject: [PATCH] feat(kaizen): recurrence count + referenced-path existence Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/friction-scan.py | 35 ++++++++++++++++++++++++++++++++--- tests/test_friction_scan.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 3 deletions(-) diff --git a/scripts/friction-scan.py b/scripts/friction-scan.py index c1a8ae6..66925ff 100644 --- a/scripts/friction-scan.py +++ b/scripts/friction-scan.py @@ -18,6 +18,10 @@ FRICTION = os.path.join(REPO_ROOT, "docs", "FRICTION.md") TAG_RE = re.compile(r"`\[(friction|gotcha|recurring|unused)\]`") DATE_RE = re.compile(r"(\d{4})-(\d{2})-(\d{2})") +ORDINAL_RE = re.compile(r"(\d+)(?:st|nd|rd|th)\s+(?:occurrence|reinforcement|time)", re.I) +DATELIST_RE = re.compile(r"\((\d{2}-\d{2}(?:/[\d/-]+)+)\)") +BACKTICK_RE = re.compile(r"`([^`]+)`") +PATH_EXTS = (".py", ".yml", ".yaml", ".md", ".sh", ".tf", ".j2", ".toml", ".cfg", ".hcl") def extract_open_section(text): @@ -62,6 +66,29 @@ def split_signals(section): return [s for s in signals if s] +def parse_recurrence(text): + """Best-effort recurrence count from explicit markers; default 1.""" + counts = [1] + m = ORDINAL_RE.search(text) + if m: + counts.append(int(m.group(1))) + dl = DATELIST_RE.search(text) + if dl: + counts.append(dl.group(1).count("/") + 1) + return max(counts) + + +def parse_paths(text): + """Backtick tokens that look like repo paths (contain '/' or a known ext).""" + out, seen = [], set() + for m in BACKTICK_RE.finditer(text): + tok = m.group(1).strip() + if ("/" in tok or tok.endswith(PATH_EXTS)) and tok not in seen: + seen.add(tok) + out.append(tok) + return out + + def parse_signal(raw, today): """Turn one raw signal block into a structured dict.""" tag_m = TAG_RE.search(raw) @@ -73,13 +100,15 @@ def parse_signal(raw, today): else: first_seen = None age_days = None + paths = parse_paths(raw) + still_exists = all(os.path.exists(os.path.join(REPO_ROOT, p)) for p in paths) if paths else True return { "tag": tag_m.group(1) if tag_m else None, "first_seen": first_seen, "age_days": age_days, - "recurrence_count": 1, # refined in Task 3 - "referenced_paths": [], # filled in Task 3 - "still_exists": True, # filled in Task 3 + "recurrence_count": parse_recurrence(raw), + "referenced_paths": paths, + "still_exists": still_exists, "text": " ".join(raw.split()), } diff --git a/tests/test_friction_scan.py b/tests/test_friction_scan.py index 2f995d7..7ae437f 100644 --- a/tests/test_friction_scan.py +++ b/tests/test_friction_scan.py @@ -59,3 +59,33 @@ def test_parse_signal_handles_missing_date(): assert sig["tag"] == "unused" assert sig["first_seen"] is None assert sig["age_days"] is None + + +def test_recurrence_from_ordinal(): + assert fs.parse_recurrence("blah 5th occurrence (06-05/06/06) blah") == 5 + + +def test_recurrence_from_datelist_when_no_ordinal(): + # three slash-separated date fragments -> recurrence 3 + assert fs.parse_recurrence("recurred (06-05/06-09/06-10) again") == 3 + + +def test_recurrence_defaults_to_one(): + assert fs.parse_recurrence("a one-off gotcha") == 1 + + +def test_parse_paths_picks_repo_paths_only(): + paths = fs.parse_paths("see `scripts/repo-scan.py` and `latest` and `foo.yml`") + assert "scripts/repo-scan.py" in paths + assert "foo.yml" in paths + assert "latest" not in paths + + +def test_still_exists_false_for_missing_path(): + sig = fs.parse_signal("`[unused]` **x** (2026-06-01): `scripts/nope-not-real.py`", TODAY) + assert sig["still_exists"] is False + + +def test_still_exists_true_for_real_path(): + sig = fs.parse_signal("`[gotcha]` **x** (2026-06-01): `scripts/repo-scan.py`", TODAY) + assert sig["still_exists"] is True