feat(base): pin the NetBird coordinator FQDN in /etc/hosts (mesh DNS-resilience)
Adds base__mesh_coordinator_pin (default empty = no-op). When set + base__mesh_enabled, a lineinfile task writes "<ip> <fqdn>" to /etc/hosts so a managed mesh host survives a local-DNS hiccup (the 2026-06-18 incident class). FQDN derived from base__mesh_management_url via regex_replace (no community.general). Gated on base__mesh_enabled | bool and pin length; the coordinator host (askari/offsite_hosts) stays exempt. Production pin wired for ubongo (77.42.120.136). Molecule dns_servers fix included (Docker/NetBird DNS incompatibility). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
0286c78f36
commit
f83d68d7a0
6 changed files with 43 additions and 0 deletions
|
|
@ -27,6 +27,12 @@ base__mesh_enabled: true
|
||||||
# ssh-from-control self-path (base__firewall_control_addr, group_vars/all = 10.20.10.151), or
|
# ssh-from-control self-path (base__firewall_control_addr, group_vars/all = 10.20.10.151), or
|
||||||
# mamba on the LAN. Break-glass: the physical console. (base__firewall_apply defaults true.)
|
# mamba on the LAN. Break-glass: the physical console. (base__firewall_apply defaults true.)
|
||||||
base__firewall_input_only: true
|
base__firewall_input_only: true
|
||||||
|
|
||||||
|
# DNS-resilience (ADR-016 availability / R8): pin the coordinator FQDN to askari's stable WAN
|
||||||
|
# IP in /etc/hosts so a local-DNS hiccup (the 2026-06-18 incident class) can't strand ubongo's
|
||||||
|
# mesh. askari (offsite_hosts) is exempt — it reaches the coordinator locally.
|
||||||
|
base__mesh_coordinator_pin: "77.42.120.136"
|
||||||
|
|
||||||
base__firewall_admin_addrs:
|
base__firewall_admin_addrs:
|
||||||
- "10.20.10.50" # mamba over the LAN (NetBird off). Raw DHCP lease — revisit with an
|
- "10.20.10.50" # mamba over the LAN (NetBird off). Raw DHCP lease — revisit with an
|
||||||
# OPNsense reservation when OPNsense-as-code lands; backstopped by wt0.
|
# OPNsense reservation when OPNsense-as-code lands; backstopped by wt0.
|
||||||
|
|
|
||||||
|
|
@ -51,3 +51,9 @@ base__mesh_manage: true
|
||||||
base__mesh_management_url: "https://netbird.askari.wingu.me"
|
base__mesh_management_url: "https://netbird.askari.wingu.me"
|
||||||
base__mesh_setup_key: "{{ vault.netbird.setup_key }}"
|
base__mesh_setup_key: "{{ vault.netbird.setup_key }}"
|
||||||
base__mesh_version: "0.72.4" # match the coordinator; exact apt pin confirmed on-host at deploy
|
base__mesh_version: "0.72.4" # match the coordinator; exact apt pin confirmed on-host at deploy
|
||||||
|
|
||||||
|
# DNS-resilience (ADR-016 availability / accepted-risk R8): when set to the coordinator's
|
||||||
|
# stable IP, pin the coordinator FQDN (derived from base__mesh_management_url) in /etc/hosts
|
||||||
|
# so a managed mesh host survives a local-DNS hiccup (the 2026-06-18 incident class). Empty
|
||||||
|
# = no pin. The coordinator host itself (askari/offsite_hosts) is exempt — leave it empty.
|
||||||
|
base__mesh_coordinator_pin: ""
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@
|
||||||
base__mesh_enabled: true
|
base__mesh_enabled: true
|
||||||
base__mesh_manage: false
|
base__mesh_manage: false
|
||||||
base__mesh_setup_key: "dummy-molecule-key"
|
base__mesh_setup_key: "dummy-molecule-key"
|
||||||
|
base__mesh_coordinator_pin: "203.0.113.9" # fixture IP (TEST-NET-3); pins FQDN from base__mesh_management_url
|
||||||
base__ssh_listen_mesh_only: true
|
base__ssh_listen_mesh_only: true
|
||||||
base__ssh_listen_addr: "100.99.0.1" # fixture mesh IP (no wt0 in the container)
|
base__ssh_listen_addr: "100.99.0.1" # fixture mesh IP (no wt0 in the container)
|
||||||
firewall_zones:
|
firewall_zones:
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,11 @@ platforms:
|
||||||
# prepare.yml. This entry ensures the value exists in the container's netns at startup.
|
# prepare.yml. This entry ensures the value exists in the container's netns at startup.
|
||||||
sysctls:
|
sysctls:
|
||||||
net.ipv4.ip_nonlocal_bind: "0"
|
net.ipv4.ip_nonlocal_bind: "0"
|
||||||
|
# ubongo's /etc/resolv.conf points to the NetBird mesh DNS (100.99.x.x), which Docker
|
||||||
|
# containers can't reach (no wt0). Override to a public resolver so prepare.yml apt tasks
|
||||||
|
# can update the cache and install packages.
|
||||||
|
dns_servers:
|
||||||
|
- 8.8.8.8
|
||||||
|
|
||||||
provisioner:
|
provisioner:
|
||||||
name: ansible
|
name: ansible
|
||||||
|
|
|
||||||
|
|
@ -103,3 +103,14 @@
|
||||||
- _nb.rc != 0
|
- _nb.rc != 0
|
||||||
fail_msg: "netbird must not be installed when base__mesh_manage is false"
|
fail_msg: "netbird must not be installed when base__mesh_manage is false"
|
||||||
success_msg: "mesh concern is a clean no-op under manage=false"
|
success_msg: "mesh concern is a clean no-op under manage=false"
|
||||||
|
|
||||||
|
- name: Read /etc/hosts (coordinator pin)
|
||||||
|
ansible.builtin.slurp:
|
||||||
|
src: /etc/hosts
|
||||||
|
register: _etchosts
|
||||||
|
- name: Assert the coordinator FQDN is pinned to the fixture IP (DNS-resilience / R8)
|
||||||
|
ansible.builtin.assert:
|
||||||
|
that:
|
||||||
|
- "'203.0.113.9 netbird.askari.wingu.me' in (_etchosts.content | b64decode)"
|
||||||
|
fail_msg: "base__mesh_coordinator_pin did not render the /etc/hosts coordinator pin"
|
||||||
|
success_msg: "coordinator FQDN pinned in /etc/hosts"
|
||||||
|
|
|
||||||
|
|
@ -64,3 +64,17 @@
|
||||||
- "'Management: Connected' not in (_netbird_status.stdout | default(''))"
|
- "'Management: Connected' not in (_netbird_status.stdout | default(''))"
|
||||||
no_log: true # setup key is on the argv
|
no_log: true # setup key is on the argv
|
||||||
tags: [mesh]
|
tags: [mesh]
|
||||||
|
|
||||||
|
- name: Pin the NetBird coordinator FQDN in /etc/hosts (DNS-resilience, ADR-016 availability / R8)
|
||||||
|
ansible.builtin.lineinfile:
|
||||||
|
path: /etc/hosts
|
||||||
|
regexp: '\s{{ _coordinator_fqdn | regex_escape }}$'
|
||||||
|
line: "{{ base__mesh_coordinator_pin }} {{ _coordinator_fqdn }}"
|
||||||
|
state: present
|
||||||
|
unsafe_writes: true # /etc/hosts is a bind mount in Docker; atomic rename is impossible
|
||||||
|
vars:
|
||||||
|
_coordinator_fqdn: "{{ base__mesh_management_url | regex_replace('^https?://', '') | regex_replace('[:/].*', '') }}"
|
||||||
|
when:
|
||||||
|
- base__mesh_enabled | bool
|
||||||
|
- base__mesh_coordinator_pin | length > 0
|
||||||
|
tags: [mesh]
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue