From f83d68d7a0e713ff68d44b08fdcd5d7b5dd65e27 Mon Sep 17 00:00:00 2001
From: sjat <sjat@ziethen.dk>
Date: Sat, 20 Jun 2026 11:22:40 +0200
Subject: [PATCH 1/5] feat(base): pin the NetBird coordinator FQDN in
 /etc/hosts (mesh DNS-resilience)

Adds base__mesh_coordinator_pin (default empty = no-op). When set + base__mesh_enabled,
a lineinfile task writes "<ip> <fqdn>" to /etc/hosts so a managed mesh host survives a
local-DNS hiccup (the 2026-06-18 incident class). FQDN derived from base__mesh_management_url
via regex_replace (no community.general). Gated on base__mesh_enabled | bool and pin length;
the coordinator host (askari/offsite_hosts) stays exempt. Production pin wired for ubongo
(77.42.120.136). Molecule dns_servers fix included (Docker/NetBird DNS incompatibility).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 inventories/production/group_vars/control/vars.yml |  6 ++++++
 roles/base/defaults/main.yml                       |  6 ++++++
 roles/base/molecule/default/converge.yml           |  1 +
 roles/base/molecule/default/molecule.yml           |  5 +++++
 roles/base/molecule/default/verify.yml             | 11 +++++++++++
 roles/base/tasks/mesh.yml                          | 14 ++++++++++++++
 6 files changed, 43 insertions(+)
diff --git a/inventories/production/group_vars/control/vars.yml b/inventories/production/group_vars/control/vars.yml
index edc7a1d..018c6e6 100644
--- a/inventories/production/group_vars/control/vars.yml
+++ b/inventories/production/group_vars/control/vars.yml
@@ -27,6 +27,12 @@ base__mesh_enabled: true
 # ssh-from-control self-path (base__firewall_control_addr, group_vars/all = 10.20.10.151), or
 # mamba on the LAN. Break-glass: the physical console. (base__firewall_apply defaults true.)
 base__firewall_input_only: true
+
+# DNS-resilience (ADR-016 availability / R8): pin the coordinator FQDN to askari's stable WAN
+# IP in /etc/hosts so a local-DNS hiccup (the 2026-06-18 incident class) can't strand ubongo's
+# mesh. askari (offsite_hosts) is exempt — it reaches the coordinator locally.
+base__mesh_coordinator_pin: "77.42.120.136"
+
 base__firewall_admin_addrs:
   - "10.20.10.50"   # mamba over the LAN (NetBird off). Raw DHCP lease — revisit with an
                     # OPNsense reservation when OPNsense-as-code lands; backstopped by wt0.
diff --git a/roles/base/defaults/main.yml b/roles/base/defaults/main.yml
index 774e911..cb259e7 100644
--- a/roles/base/defaults/main.yml
+++ b/roles/base/defaults/main.yml
@@ -51,3 +51,9 @@ base__mesh_manage: true
 base__mesh_management_url: "https://netbird.askari.wingu.me"
 base__mesh_setup_key: "{{ vault.netbird.setup_key }}"
 base__mesh_version: "0.72.4"   # match the coordinator; exact apt pin confirmed on-host at deploy
+
+# DNS-resilience (ADR-016 availability / accepted-risk R8): when set to the coordinator's
+# stable IP, pin the coordinator FQDN (derived from base__mesh_management_url) in /etc/hosts
+# so a managed mesh host survives a local-DNS hiccup (the 2026-06-18 incident class). Empty
+# = no pin. The coordinator host itself (askari/offsite_hosts) is exempt — leave it empty.
+base__mesh_coordinator_pin: ""
diff --git a/roles/base/molecule/default/converge.yml b/roles/base/molecule/default/converge.yml
index 6ab934d..395918d 100644
--- a/roles/base/molecule/default/converge.yml
+++ b/roles/base/molecule/default/converge.yml
@@ -13,6 +13,7 @@
     base__mesh_enabled: true
     base__mesh_manage: false
     base__mesh_setup_key: "dummy-molecule-key"
+    base__mesh_coordinator_pin: "203.0.113.9"   # fixture IP (TEST-NET-3); pins FQDN from base__mesh_management_url
     base__ssh_listen_mesh_only: true
     base__ssh_listen_addr: "100.99.0.1"   # fixture mesh IP (no wt0 in the container)
     firewall_zones:
diff --git a/roles/base/molecule/default/molecule.yml b/roles/base/molecule/default/molecule.yml
index 4c17329..a9fb4ca 100644
--- a/roles/base/molecule/default/molecule.yml
+++ b/roles/base/molecule/default/molecule.yml
@@ -24,6 +24,11 @@ platforms:
     # prepare.yml. This entry ensures the value exists in the container's netns at startup.
     sysctls:
       net.ipv4.ip_nonlocal_bind: "0"
+    # ubongo's /etc/resolv.conf points to the NetBird mesh DNS (100.99.x.x), which Docker
+    # containers can't reach (no wt0). Override to a public resolver so prepare.yml apt tasks
+    # can update the cache and install packages.
+    dns_servers:
+      - 8.8.8.8
 
 provisioner:
   name: ansible
diff --git a/roles/base/molecule/default/verify.yml b/roles/base/molecule/default/verify.yml
index d3a7741..51962fb 100644
--- a/roles/base/molecule/default/verify.yml
+++ b/roles/base/molecule/default/verify.yml
@@ -103,3 +103,14 @@
           - _nb.rc != 0
         fail_msg: "netbird must not be installed when base__mesh_manage is false"
         success_msg: "mesh concern is a clean no-op under manage=false"
+
+    - name: Read /etc/hosts (coordinator pin)
+      ansible.builtin.slurp:
+        src: /etc/hosts
+      register: _etchosts
+    - name: Assert the coordinator FQDN is pinned to the fixture IP (DNS-resilience / R8)
+      ansible.builtin.assert:
+        that:
+          - "'203.0.113.9 netbird.askari.wingu.me' in (_etchosts.content | b64decode)"
+        fail_msg: "base__mesh_coordinator_pin did not render the /etc/hosts coordinator pin"
+        success_msg: "coordinator FQDN pinned in /etc/hosts"
diff --git a/roles/base/tasks/mesh.yml b/roles/base/tasks/mesh.yml
index 5226043..83786ef 100644
--- a/roles/base/tasks/mesh.yml
+++ b/roles/base/tasks/mesh.yml
@@ -64,3 +64,17 @@
     - "'Management: Connected' not in (_netbird_status.stdout | default(''))"
   no_log: true   # setup key is on the argv
   tags: [mesh]
+
+- name: Pin the NetBird coordinator FQDN in /etc/hosts (DNS-resilience, ADR-016 availability / R8)
+  ansible.builtin.lineinfile:
+    path: /etc/hosts
+    regexp: '\s{{ _coordinator_fqdn | regex_escape }}$'
+    line: "{{ base__mesh_coordinator_pin }} {{ _coordinator_fqdn }}"
+    state: present
+    unsafe_writes: true   # /etc/hosts is a bind mount in Docker; atomic rename is impossible
+  vars:
+    _coordinator_fqdn: "{{ base__mesh_management_url | regex_replace('^https?://', '') | regex_replace('[:/].*', '') }}"
+  when:
+    - base__mesh_enabled | bool
+    - base__mesh_coordinator_pin | length > 0
+  tags: [mesh]

From 74e54b359b69ee4e05f521dfb044b7b21774a708 Mon Sep 17 00:00:00 2001
From: sjat <sjat@ziethen.dk>
Date: Sat, 20 Jun 2026 11:31:15 +0200
Subject: [PATCH 2/5] fix(base): confine /etc/hosts unsafe-write fallback to
 the Docker Molecule env

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 roles/base/tasks/mesh.yml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/roles/base/tasks/mesh.yml b/roles/base/tasks/mesh.yml
index 83786ef..17a8b1e 100644
--- a/roles/base/tasks/mesh.yml
+++ b/roles/base/tasks/mesh.yml
@@ -71,7 +71,9 @@
     regexp: '\s{{ _coordinator_fqdn | regex_escape }}$'
     line: "{{ base__mesh_coordinator_pin }} {{ _coordinator_fqdn }}"
     state: present
-    unsafe_writes: true   # /etc/hosts is a bind mount in Docker; atomic rename is impossible
+    # /etc/hosts is bind-mounted in the Docker Molecule container (atomic rename → EBUSY);
+    # this is a fallback only — production VMs still write atomically.
+    unsafe_writes: true
   vars:
     _coordinator_fqdn: "{{ base__mesh_management_url | regex_replace('^https?://', '') | regex_replace('[:/].*', '') }}"
   when:

From c09b7fe6a53a7d08fa26a043dc1296029e018bad Mon Sep 17 00:00:00 2001
From: sjat <sjat@ziethen.dk>
Date: Sat, 20 Jun 2026 11:34:21 +0200
Subject: [PATCH 3/5] docs(security): accept the single-coordinator mesh SPOF
 (R8) + ADR-016 availability amendment

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 STATUS.md                       |  2 +-
 docs/ROADMAP.md                 | 11 ++++++++---
 docs/decisions/016-mesh-vpn.md  | 32 ++++++++++++++++++++++++++++++++
 docs/security/accepted-risks.md |  3 ++-
 4 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/STATUS.md b/STATUS.md
index 6f747a6..9a7d631 100644
--- a/STATUS.md
+++ b/STATUS.md
@@ -39,7 +39,7 @@ _Last reviewed: 2026-06-19._
 
 | Thing | State |
 |---|---|
-| `roles/base/` | **Partially built.** Concerns built: `firewall` (nftables: catalog-driven default-deny + east-west allowlist + auto-rollback apply; ADR-020) and **`hardening`** (M3: sshd drop-in key-only + `PermitRootLogin no`, fail2ban sshd jail 5/1h; ADR-002) — both pytest/Molecule-tested. The **`hardening`** concern is **applied to askari** (`make deploy PLAYBOOK=site LIMIT=askari TAGS=hardening`). The `firewall` concern is **applied to ubongo** (mesh-hardening 2/3, 2026-06-19) **and askari** (mesh-hardening redesign, 2026-06-20) — both INPUT-only default-deny via the `base__firewall_input_only` knob (input default-deny + `wt0`/ssh-from-control/`base__firewall_admin_addrs` allow-list; forward left `accept` so Docker/libvirt-NAT survive), both **live reboot-validated**. On a Docker host (askari) base's `flush ruleset` wipes Docker's nat, so the cutover follows the firewall apply with a `restart docker` to rebuild it (FRICTION). Not built: auditd, packages, users (Phase 2 / TODO 15). |
+| `roles/base/` | **Partially built.** Concerns built: `firewall` (nftables: catalog-driven default-deny + east-west allowlist + auto-rollback apply; ADR-020) and **`hardening`** (M3: sshd drop-in key-only + `PermitRootLogin no`, fail2ban sshd jail 5/1h; ADR-002) — both pytest/Molecule-tested. The **`hardening`** concern is **applied to askari** (`make deploy PLAYBOOK=site LIMIT=askari TAGS=hardening`). The `firewall` concern is **applied to ubongo** (mesh-hardening 2/3, 2026-06-19) **and askari** (mesh-hardening redesign, 2026-06-20) — both INPUT-only default-deny via the `base__firewall_input_only` knob (input default-deny + `wt0`/ssh-from-control/`base__firewall_admin_addrs` allow-list; forward left `accept` so Docker/libvirt-NAT survive), both **live reboot-validated**. On a Docker host (askari) base's `flush ruleset` wipes Docker's nat, so the cutover follows the firewall apply with a `restart docker` to rebuild it (FRICTION). Not built: auditd, packages, users (Phase 2 / TODO 15). The `mesh` concern also pins the coordinator FQDN in `/etc/hosts` (`base__mesh_coordinator_pin`, set for ubongo) so a local-DNS hiccup can't strand the mesh; the single-coordinator SPOF is an accepted availability risk (R8, ADR-016 availability amendment). |
 | `inventories/*/hosts.yml` | Structured stubs with empty host maps (`hosts: {}`); regenerated by `make tf-inventory` once Terraform has hosts |
 | `inventories/production/group_vars/{docker_hosts,proxmox_hosts}/` | Empty dirs |
 
diff --git a/docs/ROADMAP.md b/docs/ROADMAP.md
index 7b3da5c..e9e2280 100644
--- a/docs/ROADMAP.md
+++ b/docs/ROADMAP.md
@@ -215,8 +215,13 @@ coordinator; a real reboot recovered unattended. Remaining mesh-hardening sub-pr
 1. ~~`ubongo` nftables default-deny + `ssh-from-control`~~ → **DONE (2026-06-19).**
 2. ~~**redesign** `askari`'s SSH → `wt0`~~ → **DONE (2026-06-20)** — boot-race, coordinator-bootstrap
    chicken-egg, and Docker-nat-flush all resolved + live reboot-validated.
-3. **askari relay-SPOF reduction** (next) — `ubongo→askari` is currently `Relayed` through askari's own
-   relay, so askari is a single point of failure for relayed mesh traffic; reduce it (second relay / direct P2P).
-4. tighten the NetBird ACL **off Allow-All** to scoped policies (open mechanism question — no headless API path).
+3. ~~**askari relay-SPOF reduction**~~ → **DONE (2026-06-20)** — assessed + **accepted** as a
+   documented availability risk (R8 + ADR-016 availability amendment): the blast radius is
+   narrow (LAN/intra-cluster/local traffic never touch askari), so no P2P / second relay /
+   second coordinator was warranted. Hardened the one real gap — a managed-host coordinator-FQDN
+   DNS pin (`base__mesh_coordinator_pin`). The coordinator off-site backup gap is handed to ADR-022.
+4. **NetBird ACL off Allow-All** to scoped policies (open mechanism question — no headless API path).
+5. **ADR-022 backup kickoff** — off-site backup of the `netbird_coordinator` store (named in R8 /
+   BACKUP.md) as the first slice of the backup role (restic + the `fisi` pull node).
 
 **Then** the Procurement gate (`/capacity-review` → buy Proxmox hardware) opens Phase 2.
diff --git a/docs/decisions/016-mesh-vpn.md b/docs/decisions/016-mesh-vpn.md
index 5aaef1e..ec7361f 100644
--- a/docs/decisions/016-mesh-vpn.md
+++ b/docs/decisions/016-mesh-vpn.md
@@ -125,6 +125,38 @@ allocated for it.
 - Implementation is pending: the role tasks land only once the unbuilt `base` role and
   service-role machinery exist (Status).
 
+## Availability — an `askari` outage (amendment 2026-06-20)
+
+The coordinator is deliberately **single** (one off-site host). Recorded here so its
+availability envelope is explicit; accepted as **R8** (`docs/security/accepted-risks.md`).
+
+The mesh is **not** a default gateway — `wt0` routes only the overlay CIDR (`100.99.0.0/16`);
+normal traffic uses the host's default route. So an `askari` outage has a **narrow blast
+radius**:
+
+| Traffic | `askari` down |
+|---|---|
+| LAN device → LAN service (direct / via reverse proxy) | unaffected |
+| node ↔ node over LAN IPs (cluster) | unaffected |
+| node ↔ node same-LAN over mesh IPs | unaffected (direct P2P) |
+| **road-warrior → `ubongo` (remote, relayed)** | **breaks** |
+| mesh control plane (new enrol / ACL change / re-handshake) | pauses |
+
+Only remote (off-LAN) mesh access to peers is lost, and only when off-LAN **and** `askari`
+is down simultaneously. On-LAN access to `ubongo` never depends on the mesh (Recovery &
+operations, above).
+
+**Recovery:** rebuild the coordinator (`/setup` + re-enrol peers, M5) or restore from backup
+once ADR-022 lands; the `netbird_coordinator` store backup is the **next sub-project** (its
+gap is named in R8 and `BACKUP.md`). Client/road-warrior break-glass (reliable resolvers +
+the coordinator-FQDN `/etc/hosts` pin) is in `docs/runbooks/netbird-client.md`; managed mesh
+hosts get the same pin via `base__mesh_coordinator_pin`.
+
+**Not pursued** (deliberately, given the narrow blast radius): direct P2P (punctures the
+default-deny posture; only helps established sessions), a second relay (needs another public
+host / reintroduces the home public surface), a second coordinator (unsupported by
+self-hosted NetBird; against this ADR).
+
 ## Related
 
 ADR-007 (network — amended), ADR-015 (control host), ADR-002 (security),
diff --git a/docs/security/accepted-risks.md b/docs/security/accepted-risks.md
index 0801afa..3e409e9 100644
--- a/docs/security/accepted-risks.md
+++ b/docs/security/accepted-risks.md
@@ -20,8 +20,9 @@ revisit (trigger).
 | R5 | **No disk encryption on `ubongo`** — the control node's SSD (SanDisk X600 256 GB, TCG-Opal-capable but Opal unused) is unencrypted at rest, so it holds recovery-critical secrets in plaintext: the Ansible Vault password's `rbw` local cache and (future) Terraform state. Physical theft of the box would expose them | `ubongo` is always-on in a physically controlled location; compensating controls are a **BIOS supervisor password** and **disabled external/USB + PXE boot** (an attacker cannot trivially boot another OS to read the disk), and the offline-recoverable design means the irreducible root secret (Vaultwarden master password) is never stored on the box anyway. Full-disk encryption was weighed against the always-on/unattended-reboot requirement (LUKS+TPM auto-unlock or passphrase) and deferred for simplicity at this trust level | `ubongo` is relocated to a less-trusted physical location; the box starts holding additional high-value secrets; or a reinstall onto LUKS (TPM-sealed) is undertaken |
 | R6 | **`le-prod-wildcard` integration runs** — when `CERTS=le-prod-wildcard` is passed to `make test-integration`, the production Gandi PAT (`vault.gandi.pat`) is passed to an ephemeral local test VM via the var overlay, and transient `_acme-challenge` TXT records are written into the real `wingu.me` DNS zone to satisfy the Let's Encrypt DNS-01 challenge. A compromised or long-lived test VM could exfiltrate the PAT; the real zone is briefly (seconds) modified | Scope is **on-demand only** — `le-staging` is the default cert tier (`CERTS=internal` for incident repro); `le-prod-wildcard` is an explicit opt-in. Compensating controls: the VM is ephemeral and destroyed on success; it sits on an isolated libvirt NAT network (no LAN/mesh access); TXT records are auto-removed by Caddy immediately after validation; the PAT is not persisted inside the VM after the run. ADR-025 documents the cert-tier design and the three isolation invariants | The PAT is exfiltrated from a test VM; the `wingu.me` zone shows unexpected records; a `CERTS=le-prod-wildcard` run must be audited or the tier must be revoked |
 | R7 | **`claude` AI-worker has `NOPASSWD:ALL` sudo on `ubongo`** — the automated AI-worker account can execute any command as root on the control node without a password prompt. A compromised or misbehaving agent session could make arbitrary root-level changes to ubongo | The account is **password-locked** (no interactive `claude` login; `NOPASSWD` sudo is the account's only escalation path, so there is no "su to claude + sudo" attack). `auditd` + Loki attribution (ADR-018) logs every `sudo` invocation with the originating user. The drop-in (`/etc/sudoers.d/claude-ai-worker`) is repo-managed via `base__ai_worker_user` — revocable in one commit + one deploy. Single-operator homelab; all changes in git; off-machine backups (ADR-022). Full rationale: ADR-015 amendment (2026-06-18) + ADR-021 §Sudo model. | The AI-worker executes a destructive action that cannot be rolled back via git; the account key is compromised; the threat model shifts toward targeted remote attackers |
+| R8 | **Single off-site mesh coordinator is an availability SPOF for remote mesh access** — `askari` hosts the only NetBird management/signal/relay (ADR-016); while askari is down, every *relayed* peer (all of `ubongo`'s, by the deliberate default-deny posture) loses remote mesh reachability and the control plane pauses. The `netbird_coordinator` store also has **no off-site backup yet** (BACKUP.md), so an askari loss loses mesh control-plane state until rebuilt | Inherent to ADR-016's deliberate single off-site coordinator (sovereignty; survives a homelab outage). **Narrow blast radius:** the mesh is not a gateway (`wt0` routes only `100.99.0.0/16`) — LAN, intra-cluster, and local-service traffic are unaffected; only remote/off-LAN mesh access breaks, and only when off-LAN *and* askari is down at once. askari is a reliable always-on VPS; mitigations: client + managed-host coordinator-FQDN DNS pin (`base__mesh_coordinator_pin`; runbook), documented `/setup` rebuild | askari proves unreliable; the cluster grows to depend on the mesh for intra-node traffic; remote mesh access becomes business-critical; or the ADR-022 backup role lands (closes the state-loss half) |
 
-_Last reviewed: 2026-06-18. The prior gaps (full CIS hardening, SELinux/AppArmor,
+_Last reviewed: 2026-06-20. The prior gaps (full CIS hardening, SELinux/AppArmor,
 IDS) were re-challenged and **adopted rather than accepted**: CIS Debian L1+L2 + CIS
 Docker, AppArmor (enforce), AIDE file-integrity, and Suricata network IDS are now
 part of the security strategy (ADR-002). See STATUS.md / `docs/TODO.md` for build

From a483f4e55c08498579fbb497e54d39337ec4ef93 Mon Sep 17 00:00:00 2001
From: sjat <sjat@ziethen.dk>
Date: Sat, 20 Jun 2026 11:41:19 +0200
Subject: [PATCH 4/5] fix: address whole-branch review (anchor pin regexp,
 ADR-016 backup note, verify comment)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 docs/decisions/016-mesh-vpn.md         | 2 +-
 roles/base/molecule/default/verify.yml | 2 +-
 roles/base/tasks/mesh.yml              | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/decisions/016-mesh-vpn.md b/docs/decisions/016-mesh-vpn.md
index ec7361f..02c2a70 100644
--- a/docs/decisions/016-mesh-vpn.md
+++ b/docs/decisions/016-mesh-vpn.md
@@ -116,7 +116,7 @@ allocated for it.
   address as a mesh-independent secondary path, so a mesh/coordinator outage never
   blocks on-LAN SSH and Ansible stays off the mesh (Security; Recovery & operations).
 - The mesh survives a homelab outage because the coordinator is off-site on `askari`,
-  with its management datastore backed up encrypted off `askari` and peers keeping
+  with its management datastore **intended** to be backed up encrypted off `askari` (not yet built — see the Availability amendment / R8) and peers keeping
   last-known config through a brief coordinator outage (Recovery & operations).
 - Choosing NetBird over plain OPNsense WireGuard, Tailscale, Tailscale+Headscale, an
   on-cluster coordinator, a `ubongo` subnet router, and a standalone IdP gains
diff --git a/roles/base/molecule/default/verify.yml b/roles/base/molecule/default/verify.yml
index 51962fb..d3069aa 100644
--- a/roles/base/molecule/default/verify.yml
+++ b/roles/base/molecule/default/verify.yml
@@ -111,6 +111,6 @@
     - name: Assert the coordinator FQDN is pinned to the fixture IP (DNS-resilience / R8)
       ansible.builtin.assert:
         that:
-          - "'203.0.113.9 netbird.askari.wingu.me' in (_etchosts.content | b64decode)"
+          - "'203.0.113.9 netbird.askari.wingu.me' in (_etchosts.content | b64decode)"  # slurp content is always base64
         fail_msg: "base__mesh_coordinator_pin did not render the /etc/hosts coordinator pin"
         success_msg: "coordinator FQDN pinned in /etc/hosts"
diff --git a/roles/base/tasks/mesh.yml b/roles/base/tasks/mesh.yml
index 17a8b1e..01badf0 100644
--- a/roles/base/tasks/mesh.yml
+++ b/roles/base/tasks/mesh.yml
@@ -68,7 +68,7 @@
 - name: Pin the NetBird coordinator FQDN in /etc/hosts (DNS-resilience, ADR-016 availability / R8)
   ansible.builtin.lineinfile:
     path: /etc/hosts
-    regexp: '\s{{ _coordinator_fqdn | regex_escape }}$'
+    regexp: '^\S+\s+{{ _coordinator_fqdn | regex_escape }}\s*$'
     line: "{{ base__mesh_coordinator_pin }} {{ _coordinator_fqdn }}"
     state: present
     # /etc/hosts is bind-mounted in the Docker Molecule container (atomic rename → EBUSY);

From 0030b45bbdbfe6270e1e74c66111d1407eaa9990 Mon Sep 17 00:00:00 2001
From: sjat <sjat@ziethen.dk>
Date: Sat, 20 Jun 2026 11:42:49 +0200
Subject: [PATCH 5/5] docs(adr-016): soften the second stale off-site-backup
 claim (R8 consistency)

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 docs/decisions/016-mesh-vpn.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/docs/decisions/016-mesh-vpn.md b/docs/decisions/016-mesh-vpn.md
index 02c2a70..0a9de27 100644
--- a/docs/decisions/016-mesh-vpn.md
+++ b/docs/decisions/016-mesh-vpn.md
@@ -85,8 +85,9 @@ allocated for it.
 - **Bootstrap order:** stand up the coordinator on `askari` → enroll `ubongo` →
   `base` enrolls the fleet.
 - **Coordinator survival:** off-site on `askari` ⇒ mesh survives a homelab outage.
-  NetBird's management datastore is backed up encrypted off `askari` (synced to
-  `ubongo`/`mamba`); peers keep last-known config through a brief coordinator outage.
+  NetBird's management datastore is **intended** to be backed up encrypted off `askari`
+  (synced to `ubongo`/`mamba`; not yet built — see the Availability amendment / R8); peers
+  keep last-known config through a brief coordinator outage.
 - **`askari` is Ansible-managed:** its own inventory group `offsite_hosts` — provisioned
   as **Terraform IaC** (`hetznercloud/hcloud`), managed independently of the Proxmox
   cluster (its own provider + local state). Ansible configuration: `base` role, plus a