diff --git a/inventories/production/group_vars/offsite_hosts/vars.yml b/inventories/production/group_vars/offsite_hosts/vars.yml index 03047e4..54007b3 100644 --- a/inventories/production/group_vars/offsite_hosts/vars.yml +++ b/inventories/production/group_vars/offsite_hosts/vars.yml @@ -1,17 +1,21 @@ --- # Off-site hosts (askari). askari runs the NetBird coordinator AND is a mesh peer -# (ADR-016, M5). base__mesh_enabled stays true (M5 enrollment). +# (ADR-016, M5). # -# Mesh-hardening 1/3 (move SSH onto wt0 + nftables default-deny) was attempted on -# 2026-06-17 and BACKED OUT after it took askari down: applying base's nftables -# `forward policy drop` to a Docker host broke container forwarding/NAT on reboot, and the -# wt0-only sshd ListenAddress left no break-glass (ip_nonlocal_bind did not beat the boot -# race). Until docker_host ships Docker-safe container-forward rules and the boot-race + -# coordinator-bootstrap issues are re-designed, askari keeps: -# - sshd listening on all interfaces (reachable over the WAN; Hetzner Cloud Firewall is -# the perimeter) — base__ssh_listen_mesh_only stays false, -# - the host nftables firewall NOT applied — base__firewall_apply false. -# See the incident write-up / the mesh-hardening re-spec before re-enabling either. +# Mesh-hardening REDESIGN (2026-06-19): the 2026-06-17 attempt was backed out (forward +# `policy drop` broke Docker on reboot; wt0-only sshd left no break-glass; ip_nonlocal_bind +# did not beat the boot-race). The redesign mirrors the proven ubongo 2/3 pattern: +# - INPUT-only default-deny (base__firewall_input_only) — forward stays `policy accept` +# so Docker container forwarding/NAT survive a reboot; +# - SSH scoped by the host firewall (iifname wt0 + admin-addr), NOT a sshd ListenAddress +# change — base__ssh_listen_mesh_only stays false, so there is no boot-race; +# - WAN :22 is DELIBERATELY left open from ubongo's WAN IP (base__firewall_admin_addrs) +# as the permanent non-mesh break-glass — the coordinator-host exception (a host's only +# management path must never depend on a service that host itself hosts). +# Spec: docs/superpowers/specs/2026-06-19-mesh-hardening-askari-redesign-design.md base__mesh_enabled: true -base__ssh_listen_mesh_only: false -base__firewall_apply: false +base__firewall_apply: true +base__firewall_input_only: true # forward stays `policy accept` → Docker-safe +base__ssh_listen_mesh_only: false # no sshd ListenAddress change → no boot-race +base__firewall_admin_addrs: + - 91.226.145.80 # ubongo's (static) WAN IP — the permanent non-mesh SSH break-glass diff --git a/inventories/production/host_vars/askari.yml b/inventories/production/host_vars/askari.yml new file mode 100644 index 0000000..4106f5d --- /dev/null +++ b/inventories/production/host_vars/askari.yml @@ -0,0 +1,7 @@ +--- +# Manage askari over the NetBird mesh (wt0). Overrides the TF-generated WAN `ansible_host` +# in offsite.yml (host_vars are NOT regenerated by tf_to_inventory.py). The WAN :22 path +# (Hetzner Cloud Firewall + base__firewall_admin_addrs = ubongo's WAN) stays as the +# break-glass; the Hetzner web console is the IP-independent ultimate fallback. +# Spec: docs/superpowers/specs/2026-06-19-mesh-hardening-askari-redesign-design.md +ansible_host: 100.99.226.39