From ab1b0678abc70acc255eda22cb45555a83797c6e Mon Sep 17 00:00:00 2001 From: sjat Date: Mon, 15 Jun 2026 17:39:56 +0200 Subject: [PATCH] feat(netbird): coordinator service role (combined server + dashboard, v0.72.4) First real service role. NetBird v0.72.4 self-hosted control plane: single netbirdio/netbird-server:0.72.4 (management + signal + relay + STUN + embedded Dex) plus netbirdio/dashboard:v2.39.0, both on the shared boma Docker network so the M4a Caddy fronts them. Renders docker-compose.yml + config.yaml (secrets from vault.netbird.*, no_log) + dashboard.env. STUN 3478/udp host-exposed; everything else via the proxy. netbird_coordinator__manage gates the compose-up for Molecule. Co-Authored-By: Claude Opus 4.8 (1M context) --- roles/netbird_coordinator/README.md | 64 +++++++++++++++++++ roles/netbird_coordinator/defaults/main.yml | 15 +++++ roles/netbird_coordinator/handlers/main.yml | 7 ++ roles/netbird_coordinator/meta/main.yml | 15 +++++ .../molecule/default/converge.yml | 16 +++++ .../molecule/default/molecule.yml | 31 +++++++++ .../molecule/default/verify.yml | 32 ++++++++++ roles/netbird_coordinator/tasks/main.yml | 38 +++++++++++ .../templates/config.yaml.j2 | 26 ++++++++ .../templates/dashboard.env.j2 | 13 ++++ .../templates/docker-compose.yml.j2 | 33 ++++++++++ 11 files changed, 290 insertions(+) create mode 100644 roles/netbird_coordinator/README.md create mode 100644 roles/netbird_coordinator/defaults/main.yml create mode 100644 roles/netbird_coordinator/handlers/main.yml create mode 100644 roles/netbird_coordinator/meta/main.yml create mode 100644 roles/netbird_coordinator/molecule/default/converge.yml create mode 100644 roles/netbird_coordinator/molecule/default/molecule.yml create mode 100644 roles/netbird_coordinator/molecule/default/verify.yml create mode 100644 roles/netbird_coordinator/tasks/main.yml create mode 100644 roles/netbird_coordinator/templates/config.yaml.j2 create mode 100644 roles/netbird_coordinator/templates/dashboard.env.j2 create mode 100644 roles/netbird_coordinator/templates/docker-compose.yml.j2 diff --git a/roles/netbird_coordinator/README.md b/roles/netbird_coordinator/README.md new file mode 100644 index 0000000..b2e915d --- /dev/null +++ b/roles/netbird_coordinator/README.md @@ -0,0 +1,64 @@ +# netbird_coordinator + +Self-hosted **NetBird coordinator** — the mesh-VPN control plane (ADR-016). Runs on +`askari` (the off-site Hetzner host) and is the rendezvous point every NetBird peer +talks to. Deployed via Docker Compose (ADR-004), behind the Caddy reverse proxy. + +## Architecture — combined server + +NetBird's self-hosted stack is now a **single combined server image** plus a separate +dashboard UI — there is no longer a separate signal / relay / coturn / dex container, +and no `turnserver.conf` / `management.json` / `openid-configuration.json`. + +| Container | Image | Role | +|---|---|---| +| `netbird-server` | `netbirdio/netbird-server` | Management API + Signal + Relay + STUN + embedded Dex IdP (`/oauth2`), all on one process. Config at `/etc/netbird/config.yaml`. State in the `netbird_data` volume (SQLite). | +| `netbird-dashboard` | `netbirdio/dashboard` | Web UI. Configured purely by environment (`dashboard.env`); a public PKCE OIDC client, so its client secret is intentionally empty. | + +Both containers join the **existing external `boma` Docker network** (created by the +`reverse_proxy` role's compose) so Caddy reaches them by container name. The only +host-exposed port is **`3478/udp` (STUN)**; HTTP/gRPC/WS traffic enters via Caddy over +the boma network, not via host ports. + +### Reverse-proxy routing (added separately — M4a Caddy) + +This role does **not** add the Caddy route. The route is a separate task and must +front several upstreams on `netbird-server` over the boma network, all to the same +backend: + +- HTTP — `/api/*`, `/oauth2/*` +- Native gRPC (h2c) — `/signalexchange.SignalExchange/*`, `/management.ManagementService/*` +- WebSocket — `/relay*`, `/ws-proxy/*` (upgrade + long timeouts) +- Dashboard catch-all — `/*` → `netbird-dashboard` + +gRPC needs HTTP/2 (h2c) upstream support; WS/gRPC need extended timeouts. + +## Variables — `netbird_coordinator__*` + +| Variable | Default | Description | +|---|---|---| +| `netbird_coordinator__server_image` | `netbirdio/netbird-server:0.72.4` | Combined server image (pinned; never `latest`) | +| `netbird_coordinator__dashboard_image` | `netbirdio/dashboard:v2.39.0` | Dashboard image (versioned independently of the server) | +| `netbird_coordinator__base_dir` | `/opt/services/netbird` | Working directory for the Compose project | +| `netbird_coordinator__domain` | `netbird.askari.wingu.me` | Public hostname; feeds `exposedAddress`, the OIDC issuer, redirect URIs, and the dashboard endpoints | +| `netbird_coordinator__trusted_proxies` | `["172.16.0.0/12"]` | Source ranges NetBird trusts `X-Forwarded-*` from (`server.reverseProxy.trustedHTTPProxies`). Must cover Caddy's source IP on the boma network — verify the actual bridge subnet at deploy | +| `netbird_coordinator__manage` | `true` | Set `false` in Molecule to render templates without a Docker daemon | + +Production overrides live in `inventories/production/group_vars/`. + +## Secrets + +Two secrets come from the vault and are rendered into the host-side `config.yaml` +(mode 0640, `no_log`); they never touch the work tree or the dashboard: + +- `vault.netbird.auth_secret` — `server.authSecret` +- `vault.netbird.datastore_key` — `server.store.encryptionKey` (base64; keep the padding) + +The dashboard's OIDC client is a public PKCE client, so `AUTH_CLIENT_SECRET` is +intentionally empty — `dashboard.env` carries no secrets. + +## `netbird_coordinator__manage` toggle + +Docker operations (`docker compose up`, the restart handler) are gated on +`netbird_coordinator__manage | bool`. Molecule sets it `false` so the role can be tested +(template rendering, directory creation) without a Docker daemon. diff --git a/roles/netbird_coordinator/defaults/main.yml b/roles/netbird_coordinator/defaults/main.yml new file mode 100644 index 0000000..57cf1c2 --- /dev/null +++ b/roles/netbird_coordinator/defaults/main.yml @@ -0,0 +1,15 @@ +--- +# NetBird coordinator (self-hosted mesh-VPN control plane, ADR-016). +# Combined server image (Management + Signal + Relay + STUN) plus the dashboard UI. +netbird_coordinator__server_image: "netbirdio/netbird-server:0.72.4" +netbird_coordinator__dashboard_image: "netbirdio/dashboard:v2.39.0" +netbird_coordinator__base_dir: /opt/services/netbird +netbird_coordinator__domain: netbird.askari.wingu.me + +# Source IP ranges Caddy fronts NetBird from, rendered into config.yaml +# server.reverseProxy.trustedHTTPProxies. NetBird trusts X-Forwarded-* only from +# these. MUST cover the Caddy container's source IP on the boma Docker network — +# verify the actual bridge subnet at deploy (docker network inspect boma) and tighten. +netbird_coordinator__trusted_proxies: ["172.16.0.0/12"] + +netbird_coordinator__manage: true # set false in Molecule to render without Docker diff --git a/roles/netbird_coordinator/handlers/main.yml b/roles/netbird_coordinator/handlers/main.yml new file mode 100644 index 0000000..c4731ac --- /dev/null +++ b/roles/netbird_coordinator/handlers/main.yml @@ -0,0 +1,7 @@ +--- +- name: Restart netbird + listen: restart netbird + community.docker.docker_compose_v2: + project_src: "{{ netbird_coordinator__base_dir }}" + state: restarted + when: netbird_coordinator__manage | bool diff --git a/roles/netbird_coordinator/meta/main.yml b/roles/netbird_coordinator/meta/main.yml new file mode 100644 index 0000000..6d220e1 --- /dev/null +++ b/roles/netbird_coordinator/meta/main.yml @@ -0,0 +1,15 @@ +--- +galaxy_info: + author: sjat + description: >- + Self-hosted NetBird coordinator (ADR-016): combined server image + (Management + Signal + Relay + STUN) plus dashboard UI, run on askari via + Docker Compose behind the Caddy reverse proxy. Pinned images; secrets from + vault. + license: MIT + min_ansible_version: "2.17" + platforms: + - name: Debian + versions: + - trixie +dependencies: [] diff --git a/roles/netbird_coordinator/molecule/default/converge.yml b/roles/netbird_coordinator/molecule/default/converge.yml new file mode 100644 index 0000000..1a522b7 --- /dev/null +++ b/roles/netbird_coordinator/molecule/default/converge.yml @@ -0,0 +1,16 @@ +--- +- name: Converge + hosts: all + gather_facts: true + + vars: + netbird_coordinator__manage: false + # Dummy vault values so the secret-bearing templates render under Molecule. + # (datastore_key must be valid base64 — NetBird decodes it on the real host.) + vault: + netbird: + auth_secret: "dummy-auth-secret" + datastore_key: "ZHVtbXlrZXk=" + + roles: + - role: netbird_coordinator diff --git a/roles/netbird_coordinator/molecule/default/molecule.yml b/roles/netbird_coordinator/molecule/default/molecule.yml new file mode 100644 index 0000000..b23d8da --- /dev/null +++ b/roles/netbird_coordinator/molecule/default/molecule.yml @@ -0,0 +1,31 @@ +--- +dependency: + name: galaxy + options: + requirements-file: ../../requirements.yml + +driver: + name: docker + +platforms: + - name: instance + # Project-owned image built from .docker/molecule-debian13/Dockerfile + # and hosted in the Forgejo container registry. + # Build/push with: make molecule-image / make molecule-image-push + image: forgejo.nyumbani.baobab.band/sjat/molecule-debian13:latest + pre_build_image: true + privileged: true # required for systemd + cgroupns_mode: host + volumes: + - /sys/fs/cgroup:/sys/fs/cgroup:rw + command: /lib/systemd/systemd + +provisioner: + name: ansible + inventory: + host_vars: + instance: + ansible_user: root + +verifier: + name: ansible diff --git a/roles/netbird_coordinator/molecule/default/verify.yml b/roles/netbird_coordinator/molecule/default/verify.yml new file mode 100644 index 0000000..0d657f3 --- /dev/null +++ b/roles/netbird_coordinator/molecule/default/verify.yml @@ -0,0 +1,32 @@ +--- +- name: Verify + hosts: all + gather_facts: false + + tasks: + - name: Slurp the rendered config.yaml + ansible.builtin.slurp: + src: /opt/services/netbird/config.yaml + register: _config + - name: Assert config.yaml has expected content + ansible.builtin.assert: + that: + - _config.content | b64decode | length > 0 + - "'netbird.askari.wingu.me' in (_config.content | b64decode)" + - "'engine: \"sqlite\"' in (_config.content | b64decode)" + - "'/oauth2' in (_config.content | b64decode)" + fail_msg: "config.yaml is missing expected content" + success_msg: "config.yaml rendered correctly" + + - name: Slurp the rendered docker-compose.yml + ansible.builtin.slurp: + src: /opt/services/netbird/docker-compose.yml + register: _compose + - name: Assert compose pins both image tags + ansible.builtin.assert: + that: + - _compose.content | b64decode | length > 0 + - "'0.72.4' in (_compose.content | b64decode)" + - "'v2.39.0' in (_compose.content | b64decode)" + fail_msg: "docker-compose.yml is missing pinned image tags" + success_msg: "docker-compose.yml pins both image tags" diff --git a/roles/netbird_coordinator/tasks/main.yml b/roles/netbird_coordinator/tasks/main.yml new file mode 100644 index 0000000..1cd8e3d --- /dev/null +++ b/roles/netbird_coordinator/tasks/main.yml @@ -0,0 +1,38 @@ +--- +- name: Ensure the service directory exists + ansible.builtin.file: + path: "{{ netbird_coordinator__base_dir }}" + state: directory + mode: "0750" + tags: [config] + +- name: Render the combined server config + ansible.builtin.template: + src: config.yaml.j2 + dest: "{{ netbird_coordinator__base_dir }}/config.yaml" + mode: "0640" + no_log: true # holds authSecret + datastore encryption key + notify: restart netbird + tags: [config] + +- name: Render the dashboard env file + ansible.builtin.template: + src: dashboard.env.j2 + dest: "{{ netbird_coordinator__base_dir }}/dashboard.env" + mode: "0644" + notify: restart netbird + tags: [config] + +- name: Render the compose file + ansible.builtin.template: + src: docker-compose.yml.j2 + dest: "{{ netbird_coordinator__base_dir }}/docker-compose.yml" + mode: "0644" + tags: [config] + +- name: Bring the NetBird coordinator up + community.docker.docker_compose_v2: + project_src: "{{ netbird_coordinator__base_dir }}" + state: present + when: netbird_coordinator__manage | bool + tags: [deploy] diff --git a/roles/netbird_coordinator/templates/config.yaml.j2 b/roles/netbird_coordinator/templates/config.yaml.j2 new file mode 100644 index 0000000..5b8e7b1 --- /dev/null +++ b/roles/netbird_coordinator/templates/config.yaml.j2 @@ -0,0 +1,26 @@ +# {{ ansible_managed }} +server: + listenAddress: ":80" + exposedAddress: "https://{{ netbird_coordinator__domain }}:443" + stunPorts: [3478] + metricsPort: 9090 + healthcheckAddress: ":9000" + logLevel: "info" + logFile: "console" + authSecret: "{{ vault.netbird.auth_secret }}" + dataDir: "/var/lib/netbird" + auth: + issuer: "https://{{ netbird_coordinator__domain }}/oauth2" + signKeyRefreshEnabled: true + dashboardRedirectURIs: + - "https://{{ netbird_coordinator__domain }}/nb-auth" + - "https://{{ netbird_coordinator__domain }}/nb-silent-auth" + cliRedirectURIs: + - "http://localhost:53000/" + reverseProxy: + # to_json (not a loop) so an empty override renders [] not YAML null — + # null would mean "trust no proxy" and silently break X-Forwarded-* from Caddy. + trustedHTTPProxies: {{ netbird_coordinator__trusted_proxies | to_json }} + store: + engine: "sqlite" + encryptionKey: "{{ vault.netbird.datastore_key }}" diff --git a/roles/netbird_coordinator/templates/dashboard.env.j2 b/roles/netbird_coordinator/templates/dashboard.env.j2 new file mode 100644 index 0000000..8921cb9 --- /dev/null +++ b/roles/netbird_coordinator/templates/dashboard.env.j2 @@ -0,0 +1,13 @@ +# {{ ansible_managed }} +NETBIRD_MGMT_API_ENDPOINT=https://{{ netbird_coordinator__domain }} +NETBIRD_MGMT_GRPC_API_ENDPOINT=https://{{ netbird_coordinator__domain }} +AUTH_AUDIENCE=netbird-dashboard +AUTH_CLIENT_ID=netbird-dashboard +AUTH_CLIENT_SECRET= +AUTH_AUTHORITY=https://{{ netbird_coordinator__domain }}/oauth2 +USE_AUTH0=false +AUTH_SUPPORTED_SCOPES=openid profile email groups +AUTH_REDIRECT_URI=/nb-auth +AUTH_SILENT_REDIRECT_URI=/nb-silent-auth +NGINX_SSL_PORT=443 +LETSENCRYPT_DOMAIN=none diff --git a/roles/netbird_coordinator/templates/docker-compose.yml.j2 b/roles/netbird_coordinator/templates/docker-compose.yml.j2 new file mode 100644 index 0000000..f84c922 --- /dev/null +++ b/roles/netbird_coordinator/templates/docker-compose.yml.j2 @@ -0,0 +1,33 @@ +# {{ ansible_managed }} +services: + dashboard: + image: "{{ netbird_coordinator__dashboard_image }}" + container_name: netbird-dashboard + restart: unless-stopped + env_file: [./dashboard.env] + networks: [boma] + # Cap json logs — Docker's default driver never rotates. Interim until ADR-018 + # (Alloy log shipping) lands; consider back-porting this to reverse_proxy too. + logging: + driver: json-file + options: {max-size: "500m", max-file: "2"} + netbird-server: + image: "{{ netbird_coordinator__server_image }}" + container_name: netbird-server + restart: unless-stopped + command: ["--config", "/etc/netbird/config.yaml"] + ports: + - "3478:3478/udp" + volumes: + - netbird_data:/var/lib/netbird + - ./config.yaml:/etc/netbird/config.yaml:ro + networks: [boma] + logging: + driver: json-file + options: {max-size: "500m", max-file: "2"} +volumes: + netbird_data: +networks: + boma: + external: true + name: boma