fix(integration): fix VM boot: hostname, netplan, known_hosts handling

Three fixes found during askari_inputonly integration-test development:

1. Hostname sanitization: cloud-init rejects underscores in local-hostname
   (silently skips network-config → VM never gets DHCP). Sanitize with
   name.replace("_", "-") for the meta-data hostname; paths/domain names
   keep the original (underscore is valid there).

2. Netplan explicit interface: match.name: en* with a named key produces a
   .network file that networkd never DHCPs. Use explicit enp1s0 (all virtio
   NICs in these KVM VMs) + renderer: networkd to bypass the bug.

3. ansible_ssh_common_args in the generated hosts.yml: integration VMs
   reuse IPs (different VMs at same 192.168.150.x lease). StrictHostKey
   accept-new from ansible.cfg blocks changed keys. Add StrictHostKeyChecking=no
   + UserKnownHostsFile=/dev/null per-host to the generated inventory so
   stale known_hosts entries never block the apply step.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
sjat 2026-06-19 19:15:07 +02:00
parent 1042f161b6
commit 8ca42c389c

View file

@ -106,6 +106,12 @@ def render_run_hosts(name, ip, ansible_user, groups):
f" {name}:",
f" ansible_host: {ip}",
f" ansible_user: {ansible_user}",
# Integration VMs reuse IPs; bypass host-key caching so stale
# known_hosts entries (from prior runs with a different VM at
# the same IP) do not block the Ansible apply step.
" ansible_ssh_common_args: >-",
" -o StrictHostKeyChecking=no",
" -o UserKnownHostsFile=/dev/null",
]
return "\n".join(lines) + "\n"
@ -188,15 +194,22 @@ def up(host, name=None, mem_mib=DEFAULT_MEM_MIB, vcpus=DEFAULT_VCPUS):
overlay = CACHE_DIR / f"{name}.qcow2"
sh(["qemu-img", "create", "-f", "qcow2", "-F", "qcow2", "-b", str(img), str(overlay)])
(RUN_DIR / "user-data").write_text(render_user_data(_ssh_pubkey(), "ansible"))
(RUN_DIR / "meta-data").write_text(render_meta_data(f"iid-{name}", name))
# cloud-init rejects underscores in local-hostname (causes init-local to skip
# writing the network config → VM never gets a DHCP lease). Sanitize VM name
# for use as hostname without affecting disk paths or virsh domain names.
(RUN_DIR / "meta-data").write_text(render_meta_data(f"iid-{name}", name.replace("_", "-")))
seed = CACHE_DIR / f"{name}-seed.img"
# Force DHCP on the VM NIC — don't rely on the genericcloud image's network fallback.
# Use explicit renderer + interface name to avoid a netplan 1.1.2 generation issue:
# `match.name: en*` with a named key (e.g. `primary`) produces a .network file that
# networkd loads but never DHCPs (no DHCP4 messages, just IPv6LL). Using the real
# interface name `enp1s0` (all virtio NICs in these KVM VMs are named enp1s0) and
# `renderer: networkd` bypasses the bug.
(RUN_DIR / "network-config").write_text(
'version: 2\n'
'renderer: networkd\n'
'ethernets:\n'
' primary:\n'
' match:\n'
' name: "en*"\n'
' enp1s0:\n'
' dhcp4: true\n')
sh(["cloud-localds", "--network-config", str(RUN_DIR / "network-config"),
str(seed), str(RUN_DIR / "user-data"), str(RUN_DIR / "meta-data")])