feat(tags): checker helpers — tag collection & allowed-set

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
sjat 2026-06-06 09:28:03 +02:00
parent 24397fa280
commit b45118dac3
2 changed files with 116 additions and 0 deletions

72
scripts/check-tags.py Normal file
View file

@ -0,0 +1,72 @@
#!/usr/bin/env python3
"""
Validate that every Ansible tag used under roles/ and playbooks/ belongs to the
approved vocabulary. Single source of truth: tests/tags.yml. Rationale: ADR-019.
Allowed set = {role directory names under roles/} {concerns, special, opt_ins,
playbooks from tests/tags.yml}. Templated tags (containing "{{") are skipped
they can't be statically validated.
Usage: python3 scripts/check-tags.py
Exit 0 = all tags allowed; exit 1 = unknown tag(s) found.
"""
import pathlib
import sys
import yaml
REPO = pathlib.Path(__file__).resolve().parent.parent
VOCAB_FILE = REPO / "tests" / "tags.yml"
SCAN_DIRS = ("roles", "playbooks")
class _IgnoreUnknownTags(yaml.SafeLoader):
"""SafeLoader that tolerates custom YAML tags (e.g. !vault) instead of crashing."""
def _ignore(loader, tag_suffix, node):
return None
_IgnoreUnknownTags.add_multi_constructor("", _ignore)
_IgnoreUnknownTags.add_multi_constructor("!", _ignore)
def _static_str(value):
return isinstance(value, str) and "{{" not in value
def load_vocab(path=VOCAB_FILE):
data = yaml.safe_load(path.read_text()) or {}
vocab = set()
for key in ("concerns", "special", "opt_ins", "playbooks"):
vocab.update(data.get(key) or [])
return vocab
def role_names(repo=REPO):
roles_dir = repo / "roles"
if not roles_dir.is_dir():
return set()
return {p.name for p in roles_dir.iterdir() if p.is_dir()}
def collect_tags(node):
"""Recursively collect every static tag string under any 'tags:' key."""
tags = set()
if isinstance(node, dict):
for key, value in node.items():
if key == "tags":
if _static_str(value):
tags.add(value)
elif isinstance(value, list):
tags.update(t for t in value if _static_str(t))
tags |= collect_tags(value)
elif isinstance(node, list):
for item in node:
tags |= collect_tags(item)
return tags
if __name__ == "__main__": # pragma: no cover
sys.exit(0)

44
tests/test_check_tags.py Normal file
View file

@ -0,0 +1,44 @@
import importlib.util
import pathlib
_PATH = pathlib.Path(__file__).resolve().parent.parent / "scripts" / "check-tags.py"
_spec = importlib.util.spec_from_file_location("check_tags", _PATH)
ct = importlib.util.module_from_spec(_spec)
_spec.loader.exec_module(ct)
def test_collect_tags_list_form():
node = {"name": "t", "tags": ["firewall", "users"]}
assert ct.collect_tags(node) == {"firewall", "users"}
def test_collect_tags_string_form():
node = {"name": "t", "tags": "always"}
assert ct.collect_tags(node) == {"always"}
def test_collect_tags_nested_blocks_and_roles():
doc = [
{"hosts": "all", "roles": [{"role": "base", "tags": ["base"]}]},
{"block": [{"name": "x", "tags": ["config"]}], "tags": ["deploy"]},
]
assert ct.collect_tags(doc) == {"base", "config", "deploy"}
def test_collect_tags_ignores_templated_values():
node = {"tags": ["{{ dynamic }}", "logging"]}
assert ct.collect_tags(node) == {"logging"}
def test_load_vocab_unions_all_categories():
vocab = ct.load_vocab()
assert "firewall" in vocab # concern
assert "always" in vocab # special
assert "bootstrap" in vocab # playbook identity
assert len([c for c in vocab]) >= 12
def test_role_names_reads_role_dirs():
names = ct.role_names()
assert "base" in names
assert "docker_host" in names