From 6e3869349998752ef6e6670daec65c32c0e3301f Mon Sep 17 00:00:00 2001 From: sjat Date: Mon, 15 Jun 2026 06:57:47 +0200 Subject: [PATCH] feat(reverse_proxy): optional ACME DNS-01 via Gandi (wildcard / LAN-only) Adds a per-instance DNS-01 mode to the Caddy role for mesh/LAN-only hosts that cannot satisfy HTTP-01. Default behaviour (vanilla caddy:2 + HTTP-01, what askari runs) is unchanged. - reverse_proxy__acme_dns_provider: "" (HTTP-01) | "gandi" (DNS-01) - reverse_proxy__image: override to the custom caddy-gandi image for DNS-01 - Caddyfile gains a global `acme_dns gandi {env.GANDI_BEARER_TOKEN}` block - the PAT (vault.gandi.pat) renders into a host-only 0600 env file (no_log), loaded by compose only when DNS-01 is enabled Verified: the custom image issues a real wildcard cert (*.dns01test.wingu.me) end-to-end against LE staging via Gandi DNS-01; `caddy validate` accepts `acme_dns gandi` on the custom image and rejects it on vanilla caddy:2. Molecule (HTTP-01 default path) green. Co-Authored-By: Claude Opus 4.8 (1M context) --- roles/reverse_proxy/README.md | 37 +++++++++++++++---- roles/reverse_proxy/defaults/main.yml | 15 ++++++-- roles/reverse_proxy/tasks/main.yml | 10 +++++ roles/reverse_proxy/templates/Caddyfile.j2 | 6 +++ .../templates/docker-compose.yml.j2 | 6 ++- roles/reverse_proxy/templates/env.j2 | 5 +++ 6 files changed, 67 insertions(+), 12 deletions(-) create mode 100644 roles/reverse_proxy/templates/env.j2 diff --git a/roles/reverse_proxy/README.md b/roles/reverse_proxy/README.md index 11e979e..81d32a8 100644 --- a/roles/reverse_proxy/README.md +++ b/roles/reverse_proxy/README.md @@ -1,18 +1,34 @@ # reverse_proxy Boma's standard Caddy reverse proxy (ADR-024). Runs on `askari` (the off-site -Hetzner host) and terminates TLS for all public-facing services via ACME HTTP-01. -Uses the official `caddy:2` image — no custom build, no DNS plugin, no token required. +Hetzner host) and terminates TLS for services. It supports **two ACME challenge +types**, chosen per proxy instance by exposure: + +- **HTTP-01 (default)** — public hosts with an A-record (askari). Official `caddy:2` + image; no plugin, no token. +- **DNS-01 via Gandi** — mesh/LAN-only hosts with **no** public A-record (the cluster), + where HTTP-01 is impossible. Needs the custom `caddy-gandi` image and the Gandi PAT. ## How TLS works -Caddy obtains per-hostname certificates using the ACME HTTP-01 challenge. Port 80 -must be reachable from the internet for the challenge to succeed. Each `host` in +**HTTP-01 (default).** Caddy obtains per-hostname certificates using the ACME HTTP-01 +challenge. Port 80 must be reachable from the internet. Each `host` in `reverse_proxy__routes` gets its own certificate automatically. -> **DNS-01 (for mesh/LAN-only cluster services) is deferred to Phase 2.** The -> `caddy-dns/gandi` plugin failed to issue certificates during M4a and needs -> investigation before it can be used. +**DNS-01 (Gandi).** Set `reverse_proxy__acme_dns_provider: gandi` and point +`reverse_proxy__image` at the custom Caddy image (`make caddy-image`, built on ubongo +and pushed to the Forgejo registry — see `.docker/caddy-gandi/`). Caddy then proves +domain control by writing ACME TXT records through the Gandi LiveDNS API, so it can +issue certs — **including wildcards** — for hosts that are never publicly reachable. +The token (`vault.gandi.pat`) is injected as `GANDI_BEARER_TOKEN` via a host-only +`env` file (mode 0600) and sent as a **Bearer PAT** (the legacy Apikey scheme is gone). + +> **Verified (2026-06-15):** the custom image issues a real wildcard cert +> (`*.dns01test.wingu.me`) end-to-end against Let's Encrypt staging via Gandi DNS-01; +> `caddy validate` accepts the `acme_dns gandi` directive on the custom image and +> rejects it on vanilla `caddy:2` (`module not registered: dns.providers.gandi`). The +> original M4a failure was version skew (a pre-Bearer `libdns/gandi` that sent the +> deprecated Apikey header) plus building the image on a Hetzner IP (Go proxy 403). ## Route catalog — `reverse_proxy__routes` @@ -46,6 +62,8 @@ Use `upstream` to proxy to a Docker service, or `respond` to return a static str | `reverse_proxy__base_dir` | `/opt/services/reverse_proxy` | Working directory for Compose project | | `reverse_proxy__acme_email` | `admin@example.test` | ACME registration email | | `reverse_proxy__routes` | `[]` | List of `{host, upstream}` or `{host, respond}` entries | +| `reverse_proxy__image` | `caddy:2` | Container image. DNS-01 hosts override to the custom `caddy-gandi` image | +| `reverse_proxy__acme_dns_provider` | `""` | `""` = HTTP-01; `"gandi"` = ACME DNS-01 via the Gandi PAT | | `reverse_proxy__manage` | `true` | Set `false` in Molecule to skip Docker tasks | Production overrides live in @@ -59,4 +77,7 @@ creation) without a Docker daemon. ## Secrets -None. HTTP-01 requires no credentials. +- **HTTP-01 (default):** none — the challenge requires no credentials. +- **DNS-01 (`reverse_proxy__acme_dns_provider: gandi`):** the Gandi PAT + (`vault.gandi.pat`, the same token `public_dns` uses). Rendered host-side into + `{{ reverse_proxy__base_dir }}/env` (mode 0600, `no_log`); never committed. diff --git a/roles/reverse_proxy/defaults/main.yml b/roles/reverse_proxy/defaults/main.yml index cb57d48..46249a9 100644 --- a/roles/reverse_proxy/defaults/main.yml +++ b/roles/reverse_proxy/defaults/main.yml @@ -1,10 +1,19 @@ --- -# Caddy reverse proxy (ADR-024). Vanilla Caddy; TLS via ACME HTTP-01 (public hosts). +# Caddy reverse proxy (ADR-024). reverse_proxy__base_dir: /opt/services/reverse_proxy reverse_proxy__acme_email: admin@example.test reverse_proxy__routes: [] # each: {host: x, upstream: "svc:port"} OR {host: x, respond: "text"} reverse_proxy__manage: true # set false in Molecule to render without Docker +# ACME challenge type (ADR-024). Default is HTTP-01 with the vanilla upstream image — +# correct for PUBLIC hosts with an A-record (askari). For mesh/LAN-only hosts with NO +# public A-record (the cluster), HTTP-01 is impossible: set reverse_proxy__acme_dns_provider +# to "gandi" AND point reverse_proxy__image at the custom Caddy+Gandi image to issue certs +# (incl. wildcards) via Gandi DNS-01. The token is vault.gandi.pat (sent as a Bearer PAT; +# the legacy Apikey scheme is gone). Build the image with `make caddy-image` (on ubongo). +reverse_proxy__image: "caddy:2" # DNS-01 hosts override -> the caddy-gandi registry image +reverse_proxy__acme_dns_provider: "" # "" = HTTP-01; "gandi" = ACME DNS-01 via Gandi PAT + # access__*/backup__* are the ADR-021/022 CROSS-ROLE conventions — shared field names that # render ACCESS.md/BACKUP.md and drive /check-access · /check-backup. They intentionally do # NOT carry the reverse_proxy__ prefix, so each is marked `# noqa: var-naming[no-role-prefix]` @@ -22,7 +31,7 @@ access__api: # noqa: var-naming[no-role-prefix] reason: "Caddy admin API bound to container localhost :2019; never exposed (ADR-020 catalog owns ports)" # Backup contract (ADR-022). Stateless: Caddy's /data holds only ACME account keys + -# issued certs, which are re-requested automatically on restart via HTTP-01 (no manual -# steps). Residual risk: Let's Encrypt rate limits on rapid repeated re-issuance. +# issued certs, which are re-requested automatically on restart via ACME (HTTP-01 or +# DNS-01; no manual steps). Residual risk: Let's Encrypt rate limits on rapid re-issuance. backup__service: reverse_proxy # noqa: var-naming[no-role-prefix] backup__state: false # noqa: var-naming[no-role-prefix] diff --git a/roles/reverse_proxy/tasks/main.yml b/roles/reverse_proxy/tasks/main.yml index fb53090..74c5d19 100644 --- a/roles/reverse_proxy/tasks/main.yml +++ b/roles/reverse_proxy/tasks/main.yml @@ -14,6 +14,16 @@ notify: reload caddy tags: [config] +- name: Render the Gandi DNS-01 token env file + ansible.builtin.template: + src: env.j2 + dest: "{{ reverse_proxy__base_dir }}/env" + mode: "0600" + no_log: true # contains the Gandi PAT + when: reverse_proxy__acme_dns_provider == 'gandi' + notify: reload caddy + tags: [config] + - name: Render the compose file ansible.builtin.template: src: docker-compose.yml.j2 diff --git a/roles/reverse_proxy/templates/Caddyfile.j2 b/roles/reverse_proxy/templates/Caddyfile.j2 index ac68d6d..d84da68 100644 --- a/roles/reverse_proxy/templates/Caddyfile.j2 +++ b/roles/reverse_proxy/templates/Caddyfile.j2 @@ -1,6 +1,12 @@ # {{ ansible_managed }} { email {{ reverse_proxy__acme_email }} +{% if reverse_proxy__acme_dns_provider == 'gandi' %} + # ACME DNS-01 via Gandi (mesh/LAN-only hosts, incl. wildcard certs). Token is the + # Gandi PAT, injected from the env file as a Bearer token (ADR-024). Needs the custom + # caddy-gandi image — the upstream caddy:2 has no DNS provider modules. + acme_dns gandi {env.GANDI_BEARER_TOKEN} +{% endif %} } {% for r in reverse_proxy__routes %} {{ r.host }} { diff --git a/roles/reverse_proxy/templates/docker-compose.yml.j2 b/roles/reverse_proxy/templates/docker-compose.yml.j2 index 0310f23..740d187 100644 --- a/roles/reverse_proxy/templates/docker-compose.yml.j2 +++ b/roles/reverse_proxy/templates/docker-compose.yml.j2 @@ -1,12 +1,16 @@ # {{ ansible_managed }} services: caddy: - image: caddy:2 + image: {{ reverse_proxy__image }} container_name: caddy restart: unless-stopped ports: - "80:80" - "443:443" +{% if reverse_proxy__acme_dns_provider == 'gandi' %} + env_file: + - ./env +{% endif %} volumes: - ./Caddyfile:/etc/caddy/Caddyfile:ro - caddy_data:/data diff --git a/roles/reverse_proxy/templates/env.j2 b/roles/reverse_proxy/templates/env.j2 new file mode 100644 index 0000000..2e8133d --- /dev/null +++ b/roles/reverse_proxy/templates/env.j2 @@ -0,0 +1,5 @@ +# {{ ansible_managed }} +# Gandi Personal Access Token for ACME DNS-01 (rendered only when +# reverse_proxy__acme_dns_provider == 'gandi'). Sent by caddy-dns/gandi as a Bearer +# token to https://api.gandi.net/v5/livedns. Host-only, mode 0600 (ADR-024). +GANDI_BEARER_TOKEN={{ vault.gandi.pat }}