repo-scan: cut broken-path-ref + marker false positives
- broken-path-ref: skip template/generated-report paths — a placeholder (<service>) immediately following the match, a YYYY-MM-DD date token, or a path under a generated-report reviews/ dir (14 -> 0 on the current tree). - marker: skip numbered-backlog references (TODO 8.2, TODO-3.1, TODO (2.2, TODO item 16) which point at the backlog, not code markers (35 -> 2; the remaining two are literal "TODO:" strings in a plan doc). Real code markers (TODO:, FIXME, etc.) still caught — verified with a synthetic fixture. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
db76be2a63
commit
568729e7bd
1 changed files with 14 additions and 3 deletions
|
|
@ -24,9 +24,11 @@ PRUNE = {".git", ".venv", ".collections", ".ansible", ".worktrees",
|
|||
SKIP_PREFIX = os.path.join("docs", "reviews") # don't scan our own reports
|
||||
SOURCE_EXTS = {".yml", ".yaml", ".j2", ".py", ".sh", ".md", ".tf", ".cfg", ".ini"}
|
||||
|
||||
# Marker words, but NOT when part of a regex alternation `(TODO|...)` or a filename
|
||||
# like `TODO.md` / `docs/TODO.md`.
|
||||
MARKER_RE = re.compile(r"(?<![(|/])\b(TODO|FIXME|XXX|HACK)\b(?![|)]|\.\w)")
|
||||
# Marker words, but NOT when part of a regex alternation `(TODO|...)`, a filename
|
||||
# like `TODO.md` / `docs/TODO.md`, or a numbered backlog reference like `TODO 8.2`
|
||||
# / `TODO item 16` / `TODO #3` (those point at the backlog, they are not code markers).
|
||||
MARKER_RE = re.compile(
|
||||
r"(?<![(|/])\b(TODO|FIXME|XXX|HACK)\b(?![|)]|\.\w|[\s\-]*\(?\s*(?:item\s+)?#?\d)")
|
||||
ADR_REF_RE = re.compile(r"\bADR-(\d{3})\b")
|
||||
PATH_REF_RE = re.compile(r"(?:docs|scripts|roles|inventories|terraform|playbooks)/[\w./-]+")
|
||||
PLACEHOLDER = set("<>*${}")
|
||||
|
|
@ -198,6 +200,15 @@ def scan():
|
|||
ref = pm.group(0).rstrip(".,);:`'\"")
|
||||
if any(c in ref for c in PLACEHOLDER):
|
||||
continue
|
||||
# Skip template / generated-report paths — not real broken refs:
|
||||
# - a placeholder (<service>, ${x}) immediately follows the matched path
|
||||
# (the regex stops at the placeholder, so it isn't caught above)
|
||||
# - a date-template token (YYYY-MM-DD) appears in the path
|
||||
# - the path is under a generated-report `reviews/` directory
|
||||
if (cand[pm.end():pm.end() + 1] in PLACEHOLDER
|
||||
or re.search(r"YYYY|MM-DD", ref)
|
||||
or re.search(r"(?:^|/)reviews/", ref)):
|
||||
continue
|
||||
if not os.path.exists(os.path.join(ROOT, ref)):
|
||||
findings.append({"check": "broken-path-ref", "severity": "medium", "path": rpath,
|
||||
"line": i, "detail": f"references '{ref}' which does not exist"})
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue