From 568729e7bdc1ac370427dab5733bc11be894d2c0 Mon Sep 17 00:00:00 2001 From: sjat Date: Fri, 5 Jun 2026 20:37:40 +0200 Subject: [PATCH] repo-scan: cut broken-path-ref + marker false positives MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - broken-path-ref: skip template/generated-report paths — a placeholder () immediately following the match, a YYYY-MM-DD date token, or a path under a generated-report reviews/ dir (14 -> 0 on the current tree). - marker: skip numbered-backlog references (TODO 8.2, TODO-3.1, TODO (2.2, TODO item 16) which point at the backlog, not code markers (35 -> 2; the remaining two are literal "TODO:" strings in a plan doc). Real code markers (TODO:, FIXME, etc.) still caught — verified with a synthetic fixture. Co-Authored-By: Claude Opus 4.8 (1M context) --- scripts/repo-scan.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/scripts/repo-scan.py b/scripts/repo-scan.py index 6bea156..6146041 100644 --- a/scripts/repo-scan.py +++ b/scripts/repo-scan.py @@ -24,9 +24,11 @@ PRUNE = {".git", ".venv", ".collections", ".ansible", ".worktrees", SKIP_PREFIX = os.path.join("docs", "reviews") # don't scan our own reports SOURCE_EXTS = {".yml", ".yaml", ".j2", ".py", ".sh", ".md", ".tf", ".cfg", ".ini"} -# Marker words, but NOT when part of a regex alternation `(TODO|...)` or a filename -# like `TODO.md` / `docs/TODO.md`. -MARKER_RE = re.compile(r"(?*${}") @@ -198,6 +200,15 @@ def scan(): ref = pm.group(0).rstrip(".,);:`'\"") if any(c in ref for c in PLACEHOLDER): continue + # Skip template / generated-report paths — not real broken refs: + # - a placeholder (, ${x}) immediately follows the matched path + # (the regex stops at the placeholder, so it isn't caught above) + # - a date-template token (YYYY-MM-DD) appears in the path + # - the path is under a generated-report `reviews/` directory + if (cand[pm.end():pm.end() + 1] in PLACEHOLDER + or re.search(r"YYYY|MM-DD", ref) + or re.search(r"(?:^|/)reviews/", ref)): + continue if not os.path.exists(os.path.join(ROOT, ref)): findings.append({"check": "broken-path-ref", "severity": "medium", "path": rpath, "line": i, "detail": f"references '{ref}' which does not exist"})