repo-scan: cut broken-path-ref + marker false positives
- broken-path-ref: skip template/generated-report paths — a placeholder (<service>) immediately following the match, a YYYY-MM-DD date token, or a path under a generated-report reviews/ dir (14 -> 0 on the current tree). - marker: skip numbered-backlog references (TODO 8.2, TODO-3.1, TODO (2.2, TODO item 16) which point at the backlog, not code markers (35 -> 2; the remaining two are literal "TODO:" strings in a plan doc). Real code markers (TODO:, FIXME, etc.) still caught — verified with a synthetic fixture. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
db76be2a63
commit
568729e7bd
1 changed files with 14 additions and 3 deletions
|
|
@ -24,9 +24,11 @@ PRUNE = {".git", ".venv", ".collections", ".ansible", ".worktrees",
|
||||||
SKIP_PREFIX = os.path.join("docs", "reviews") # don't scan our own reports
|
SKIP_PREFIX = os.path.join("docs", "reviews") # don't scan our own reports
|
||||||
SOURCE_EXTS = {".yml", ".yaml", ".j2", ".py", ".sh", ".md", ".tf", ".cfg", ".ini"}
|
SOURCE_EXTS = {".yml", ".yaml", ".j2", ".py", ".sh", ".md", ".tf", ".cfg", ".ini"}
|
||||||
|
|
||||||
# Marker words, but NOT when part of a regex alternation `(TODO|...)` or a filename
|
# Marker words, but NOT when part of a regex alternation `(TODO|...)`, a filename
|
||||||
# like `TODO.md` / `docs/TODO.md`.
|
# like `TODO.md` / `docs/TODO.md`, or a numbered backlog reference like `TODO 8.2`
|
||||||
MARKER_RE = re.compile(r"(?<![(|/])\b(TODO|FIXME|XXX|HACK)\b(?![|)]|\.\w)")
|
# / `TODO item 16` / `TODO #3` (those point at the backlog, they are not code markers).
|
||||||
|
MARKER_RE = re.compile(
|
||||||
|
r"(?<![(|/])\b(TODO|FIXME|XXX|HACK)\b(?![|)]|\.\w|[\s\-]*\(?\s*(?:item\s+)?#?\d)")
|
||||||
ADR_REF_RE = re.compile(r"\bADR-(\d{3})\b")
|
ADR_REF_RE = re.compile(r"\bADR-(\d{3})\b")
|
||||||
PATH_REF_RE = re.compile(r"(?:docs|scripts|roles|inventories|terraform|playbooks)/[\w./-]+")
|
PATH_REF_RE = re.compile(r"(?:docs|scripts|roles|inventories|terraform|playbooks)/[\w./-]+")
|
||||||
PLACEHOLDER = set("<>*${}")
|
PLACEHOLDER = set("<>*${}")
|
||||||
|
|
@ -198,6 +200,15 @@ def scan():
|
||||||
ref = pm.group(0).rstrip(".,);:`'\"")
|
ref = pm.group(0).rstrip(".,);:`'\"")
|
||||||
if any(c in ref for c in PLACEHOLDER):
|
if any(c in ref for c in PLACEHOLDER):
|
||||||
continue
|
continue
|
||||||
|
# Skip template / generated-report paths — not real broken refs:
|
||||||
|
# - a placeholder (<service>, ${x}) immediately follows the matched path
|
||||||
|
# (the regex stops at the placeholder, so it isn't caught above)
|
||||||
|
# - a date-template token (YYYY-MM-DD) appears in the path
|
||||||
|
# - the path is under a generated-report `reviews/` directory
|
||||||
|
if (cand[pm.end():pm.end() + 1] in PLACEHOLDER
|
||||||
|
or re.search(r"YYYY|MM-DD", ref)
|
||||||
|
or re.search(r"(?:^|/)reviews/", ref)):
|
||||||
|
continue
|
||||||
if not os.path.exists(os.path.join(ROOT, ref)):
|
if not os.path.exists(os.path.join(ROOT, ref)):
|
||||||
findings.append({"check": "broken-path-ref", "severity": "medium", "path": rpath,
|
findings.append({"check": "broken-path-ref", "severity": "medium", "path": rpath,
|
||||||
"line": i, "detail": f"references '{ref}' which does not exist"})
|
"line": i, "detail": f"references '{ref}' which does not exist"})
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue