feat(kaizen): recurrence count + referenced-path existence

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
sjat 2026-06-14 21:17:39 +02:00
parent 72b9262f34
commit c6f66ee634
2 changed files with 62 additions and 3 deletions

View file

@ -18,6 +18,10 @@ FRICTION = os.path.join(REPO_ROOT, "docs", "FRICTION.md")
TAG_RE = re.compile(r"`\[(friction|gotcha|recurring|unused)\]`")
DATE_RE = re.compile(r"(\d{4})-(\d{2})-(\d{2})")
ORDINAL_RE = re.compile(r"(\d+)(?:st|nd|rd|th)\s+(?:occurrence|reinforcement|time)", re.I)
DATELIST_RE = re.compile(r"\((\d{2}-\d{2}(?:/[\d/-]+)+)\)")
BACKTICK_RE = re.compile(r"`([^`]+)`")
PATH_EXTS = (".py", ".yml", ".yaml", ".md", ".sh", ".tf", ".j2", ".toml", ".cfg", ".hcl")
def extract_open_section(text):
@ -62,6 +66,29 @@ def split_signals(section):
return [s for s in signals if s]
def parse_recurrence(text):
"""Best-effort recurrence count from explicit markers; default 1."""
counts = [1]
m = ORDINAL_RE.search(text)
if m:
counts.append(int(m.group(1)))
dl = DATELIST_RE.search(text)
if dl:
counts.append(dl.group(1).count("/") + 1)
return max(counts)
def parse_paths(text):
"""Backtick tokens that look like repo paths (contain '/' or a known ext)."""
out, seen = [], set()
for m in BACKTICK_RE.finditer(text):
tok = m.group(1).strip()
if ("/" in tok or tok.endswith(PATH_EXTS)) and tok not in seen:
seen.add(tok)
out.append(tok)
return out
def parse_signal(raw, today):
"""Turn one raw signal block into a structured dict."""
tag_m = TAG_RE.search(raw)
@ -73,13 +100,15 @@ def parse_signal(raw, today):
else:
first_seen = None
age_days = None
paths = parse_paths(raw)
still_exists = all(os.path.exists(os.path.join(REPO_ROOT, p)) for p in paths) if paths else True
return {
"tag": tag_m.group(1) if tag_m else None,
"first_seen": first_seen,
"age_days": age_days,
"recurrence_count": 1, # refined in Task 3
"referenced_paths": [], # filled in Task 3
"still_exists": True, # filled in Task 3
"recurrence_count": parse_recurrence(raw),
"referenced_paths": paths,
"still_exists": still_exists,
"text": " ".join(raw.split()),
}

View file

@ -59,3 +59,33 @@ def test_parse_signal_handles_missing_date():
assert sig["tag"] == "unused"
assert sig["first_seen"] is None
assert sig["age_days"] is None
def test_recurrence_from_ordinal():
assert fs.parse_recurrence("blah 5th occurrence (06-05/06/06) blah") == 5
def test_recurrence_from_datelist_when_no_ordinal():
# three slash-separated date fragments -> recurrence 3
assert fs.parse_recurrence("recurred (06-05/06-09/06-10) again") == 3
def test_recurrence_defaults_to_one():
assert fs.parse_recurrence("a one-off gotcha") == 1
def test_parse_paths_picks_repo_paths_only():
paths = fs.parse_paths("see `scripts/repo-scan.py` and `latest` and `foo.yml`")
assert "scripts/repo-scan.py" in paths
assert "foo.yml" in paths
assert "latest" not in paths
def test_still_exists_false_for_missing_path():
sig = fs.parse_signal("`[unused]` **x** (2026-06-01): `scripts/nope-not-real.py`", TODAY)
assert sig["still_exists"] is False
def test_still_exists_true_for_real_path():
sig = fs.parse_signal("`[gotcha]` **x** (2026-06-01): `scripts/repo-scan.py`", TODAY)
assert sig["still_exists"] is True