bpg / terraform-provider-proxmox

Terraform Provider for Proxmox
https://registry.terraform.io/providers/bpg/proxmox
Mozilla Public License 2.0
874 stars 139 forks source link

kernel panic when resizing disk from debian 12 genericcloud template #1639

Closed mmelyp closed 3 days ago

mmelyp commented 6 days ago

Hi everyone,

I have a problem which has been bothering me for a few days and I really don't know how to solve it.

I'm using since some time ago this terraform plugin and i was able to create successfully new debian 12 genericcloud based VMs (cloud-init).

Since some days ago, i get a "Kernel panic - not syncing: Attempted to kill init!" after disk is resized and the VM is started. When i apply the terraform manifest, this is what in proxmox side is happening:

UPID:tango:0003C2AF:00828CC1:6738AA72:download:debian-12-generic-amd64-latest.img:root@pam!terraform: 6738AA7F OK UPID:tango:0003C2FF:00829204:6738AA7F:qmcreate:100:root@pam!terraform: 6738AA80 OK UPID:tango:0003C3A3:008296D0:6738AA8B:resize:100:root@pam!terraform: 6738AA8C OK UPID:tango:0003C3C2:00829778:6738AA8D:qmstart:100:root@pam!terraform: 6738AA8E OK

This used to work without problems.

This is my terraform file:

resource "proxmox_virtual_environment_vm" "prox-vm" {

  description = local.proxmox_vm_description
  name        = var.vm["hostname"]
  node_name   = var.node_name
  on_boot     = var.vm["start"]
  started     = true

  machine       = "q35"
  scsi_hardware = "virtio-scsi-single"
  bios          = "ovmf"

  efi_disk {
    datastore_id = "local-lvm"
    file_format  = "raw"
    type         = "4m"
  }

  operating_system {
    type = "l26"
  }

  # https://registry.terraform.io/providers/bpg/proxmox/latest/docs/resources/virtual_environment_vm#agent
  agent {
    enabled = true
    trim    = true
  }

  /*   serial_device {
    device = "socket"
  } */

  cpu {
    type       = var.vm["cpu_type"]
    cores      = var.vm["cores"]
    sockets    = var.vm["sockets"]
    hotplugged = var.vm["vcpus"]
    flags      = []
  }

  memory {
    dedicated = var.vm["memory"]
  }

  disk {
    datastore_id = var.vm["disk"]["storage"]
    interface    = var.vm["disk"]["type"]
    size         = var.vm["disk"]["size"]
    iothread     = true
    discard      = "ignore"
    cache        = "none"
    aio          = "native"
    replicate    = false
    file_id      = lookup(local._cloud_images, var.vm["cloud_image"])
    file_format  = "raw"
  }

  dynamic "network_device" {
    for_each = var.vm["networks"]

    content {
      model    = "virtio"
      bridge   = network_device.value["bridge"]
      vlan_id  = network_device.value["tag"]
      firewall = network_device.value["firewall"]
    }
  }

  initialization {
    interface         = "ide0"
    user_data_file_id = lookup(local._possible_cloud_configs, var.vm["cloud_image"])

    ip_config {
      ipv4 {
        address = var.vm["networks"][0]["ip"]
        gateway = var.vm["networks"][0]["gateway"]
      }
    }

  }

  # Ignore changes to the network
  ## MAC address is generated on every apply, causing
  ## TF to think this needs to be rebuilt on every apply
  lifecycle {
    ignore_changes = [
      network_device,
    ]
  }

  dynamic "disk" {
    for_each = var.vm["extra_disks"]

    content {
      interface    = "scsi"
      datastore_id = "local-lvm"
      size         = disk.value["size"]
    }
  }
}

If i enable:

 serial_device {
    device = "socket"
 } 

then i do not get the kernel panic, but cloud-init somehow is not applied.

Here is how the cloud image and cloud-config file generation looks like:

###############################
# Supported cloud init images #
###############################

locals {
  _cloud_images = {
    debian_12_base     = try(proxmox_virtual_environment_download_file.debian_12_generic_image[0].id, null)
    debian_12_k8s_base = try(proxmox_virtual_environment_download_file.debian_12_generic_k8s_image[0].id, null)
    ubuntu_2404_base   = try(proxmox_virtual_environment_download_file.ubuntu_noble_server[0].id, null)
    ubuntu_2204_base   = try(proxmox_virtual_environment_download_file.ubuntu_jammy_server[0].id, null)
  }

  _possible_cloud_configs = {
    debian_12_base     = try(proxmox_virtual_environment_file.debian_base_cloud_config[0].id, null)
    debian_12_k8s_base = try(proxmox_virtual_environment_file.debian_k8s_base_cloud_config[0].id, null)
    ubuntu_2404_base   = try(proxmox_virtual_environment_file.ubuntu_base_cloud_config[0].id, null)
    ubuntu_2204_base   = try(proxmox_virtual_environment_file.ubuntu2204_base_cloud_config[0].id, null)
  }
}

# debian 12 bookworm
resource "proxmox_virtual_environment_download_file" "debian_12_generic_image" {
  count               = var.vm["cloud_image"] == "debian_12_base" ? 1 : 0
  node_name           = var.node_name
  content_type        = "iso"
  datastore_id        = "local"
  overwrite           = false
  overwrite_unmanaged = true

  file_name = "debian-12-generic-amd64-latest.img"
  url       = "https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-genericcloud-amd64.qcow2"
  #checksum           = "6cc752d71b390c7fea64b0b598225914a7f4adacd4a33fa366187fac01094648628e0681a109ae9320b9a79aba2832f33395fa13154dad636465b7d9cdbed599"
  #checksum_algorithm = "sha512"
}

# debian 12 bookworm (K8S)
resource "proxmox_virtual_environment_download_file" "debian_12_generic_k8s_image" {
  count               = var.vm["cloud_image"] == "debian_12_k8s_base" ? 1 : 0
  node_name           = var.node_name
  content_type        = "iso"
  datastore_id        = "local"
  overwrite           = false
  overwrite_unmanaged = true

  file_name = "debian-12-generic-amd64-latest.img"
  url       = "https://cloud.debian.org/images/cloud/bookworm/latest/debian-12-genericcloud-amd64.qcow2"
  #checksum           = "6cc752d71b390c7fea64b0b598225914a7f4adacd4a33fa366187fac01094648628e0681a109ae9320b9a79aba2832f33395fa13154dad636465b7d9cdbed599"
  #checksum_algorithm = "sha512"
}

# ubuntu 24.04 LTS
resource "proxmox_virtual_environment_download_file" "ubuntu_noble_server" {
  count               = var.vm["cloud_image"] == "ubuntu_2404_base" ? 1 : 0
  content_type        = "iso"
  datastore_id        = "local"
  node_name           = var.node_name
  overwrite           = false
  overwrite_unmanaged = true

  url = "https://cloud-images.ubuntu.com/minimal/releases/noble/release/ubuntu-24.04-minimal-cloudimg-amd64.img"
}

# ubuntu 22.04 LTS
resource "proxmox_virtual_environment_download_file" "ubuntu_jammy_server" {
  count               = var.vm["cloud_image"] == "ubuntu_2204_base" ? 1 : 0
  content_type        = "iso"
  datastore_id        = "local"
  node_name           = var.node_name
  overwrite           = false
  overwrite_unmanaged = true

  url = "https://cloud-images.ubuntu.com/minimal/releases/jammy/release/ubuntu-22.04-minimal-cloudimg-amd64.img"
}

# Make sure the "Snippets" content type is enabled on the target datastore in Proxmox before applying the configuration below.
# https://github.com/bpg/terraform-provider-proxmox/blob/main/docs/guides/cloud-init.md
resource "proxmox_virtual_environment_file" "debian_base_cloud_config" {
  count        = var.vm["cloud_image"] == "debian_12_base" ? 1 : 0
  content_type = "snippets"
  datastore_id = "local"
  node_name    = var.node_name

  source_raw {
    data = templatefile("${path.module}/cloud-init/base-debian.yaml.tftpl", {
      hostname       = var.vm["hostname"]
      username       = var.user
      password       = var.user_password
      ssh_public_key = trimspace(var.user_ssh_pub_key)
    })

    file_name = "${var.vm["hostname"]}-cloud-init-debian.yaml"
  }
}

resource "proxmox_virtual_environment_file" "debian_k8s_base_cloud_config" {
  count        = var.vm["cloud_image"] == "debian_12_k8s_base" ? 1 : 0
  content_type = "snippets"
  datastore_id = "local"
  node_name    = var.node_name

  source_raw {
    data = templatefile("${path.module}/cloud-init/base-debian-k8s.yaml.tftpl", {
      hostname       = var.vm["hostname"]
      fqdn           = "${var.vm["hostname"]}.${var.vm["domain"]}"
      ip_address     = var.vm["networks"][0]["ip"]
      gateway        = var.vm["networks"][0]["gateway"]
      username       = var.user
      password       = var.user_password
      ssh_public_key = trimspace(var.user_ssh_pub_key)
    })

    file_name = "${var.vm["hostname"]}-cloud-init-debian-k8s.yaml"
  }
}

# Make sure the "Snippets" content type is enabled on the target datastore in Proxmox before applying the configuration below.
# https://github.com/bpg/terraform-provider-proxmox/blob/main/docs/guides/cloud-init.md
resource "proxmox_virtual_environment_file" "ubuntu_base_cloud_config" {
  count        = var.vm["cloud_image"] == "ubuntu_2404_base" ? 1 : 0
  content_type = "snippets"
  datastore_id = "local"
  node_name    = var.node_name

  source_raw {
    data = templatefile("${path.module}/cloud-init/base-ubuntu.yaml.tftpl", {
      hostname       = var.vm["hostname"]
      username       = var.user
      password       = var.user_password
      ssh_public_key = trimspace(var.user_ssh_pub_key)
    })

    file_name = "${var.vm["hostname"]}-cloud-init-ubuntu.yaml"
  }
}

resource "proxmox_virtual_environment_file" "ubuntu2204_base_cloud_config" {
  count        = var.vm["cloud_image"] == "ubuntu_2204_base" ? 1 : 0
  content_type = "snippets"
  datastore_id = "local"
  node_name    = var.node_name

  source_raw {
    data = templatefile("${path.module}/cloud-init/base-ubuntu.yaml.tftpl", {
      hostname       = var.vm["hostname"]
      username       = var.user
      password       = var.user_password
      ssh_public_key = trimspace(var.user_ssh_pub_key)
    })

    file_name = "${var.vm["hostname"]}-cloud-init-ubuntu.yaml"
  }
}

and the cloud-init config template ("${path.module}/cloud-init/base-debian-k8s.yaml.tftpl):

#cloud-config
users:
  - default
  - name: ${username}
    passwd: ${password}
    lock_passwd: false
    groups: [adm, sudo]
    shell: /bin/bash
    ssh_authorized_keys:
      - ${ssh_public_key}
    sudo: ALL=(ALL) NOPASSWD:ALL

hostname: ${hostname}
fqdn: ${fqdn}
prefer_fqdn_over_hostname: true
create_hostname_file: true
package_update: true
package_upgrade: true
timezone: Europe/Berlin

write_files:
  - path: /etc/netplan/50-cloud-init.yaml
    content: |
      network:
        version: 2
        ethernets:
          eth0:
            link-local: [ ]
            dhcp4: false
            dhcp6: false
            addresses:
              - ${ip_address}
            gateway4: ${gateway}
            nameservers:
              search: [mmely.de]
              addresses: [10.20.0.247]

  - path: /etc/ssh/sshd_config.d/01-harden-ssh.conf
    content: |
      PermitRootLogin no
      PasswordAuthentication no
      ChallengeResponseAuthentication no
      UsePAM no
  - path: /etc/sysctl.d/10-disable-ipv6.conf
    permissions: 0644
    owner: root
    content: |
      net.ipv6.conf.all.disable_ipv6 = 1
      net.ipv6.conf.default.disable_ipv6 = 1
      net.ipv6.conf.lo.disable_ipv6 = 1
  - path: /etc/modules-load.d/k8s.conf
    content: |
      br_netfilter
      overlay
  - path: /etc/sysctl.conf
    content: |
      net.ipv4.ip_forward=1
      net.ipv4.conf.all.send_redirects=0
      net.ipv4.conf.default.send_redirects=0
      net.ipv4.conf.default.accept_source_route=0
      net.ipv4.conf.all.accept_redirects=0
      net.ipv4.conf.default.accept_redirects=0
      net.ipv4.conf.all.log_martians=1
      net.ipv4.conf.default.log_martians=1
      net.ipv4.conf.all.rp_filter=1
      net.ipv4.conf.default.rp_filter=1
      net.ipv6.conf.all.accept_ra=0
      net.ipv6.conf.default.accept_ra=0
      net.ipv6.conf.all.accept_redirects=0
      net.ipv6.conf.default.accept_redirects=0
      kernel.keys.root_maxbytes=25000000
      kernel.keys.root_maxkeys=1000000
      kernel.panic=10
      kernel.panic_on_oops=1
      vm.overcommit_memory=1
      vm.panic_on_oom=0
      net.ipv4.ip_local_reserved_ports=30000-32767
      net.bridge.bridge-nf-call-iptables=1
      net.bridge.bridge-nf-call-arptables=1

packages:
  - qemu-guest-agent
  - net-tools
  - vim
  - apt-transport-https
  - ca-certificates
  - locales-all
  - curl
  - gpg
  - open-iscsi
  - nfs-common
  - jq

power_state:
  delay: now
  mode: reboot
  message: Rebooting after cloud-init completion
  condition: true

runcmd:
  - systemctl restart systemd-sysctl
  - netplan apply
  - apt update
  - apt install -y qemu-guest-agent
  - systemctl enable qemu-guest-agent
  - systemctl start qemu-guest-agent
  - localectl set-locale LANG=en_US.UTF-8
  - systemctl stop apparmor.service
  - systemctl disable apparmor.service
  - apt remove -y apparmor --purge
  - rm -fr /etc/apparmor.d/
  - sed -i '/ swap / s/^\(.*\)$/#\1/g' /etc/fstab
  - swapoff -a

I have a the feeling the problem of the kernel panic, is because for some reason cloud-init is not able to be applied and cannot grow/resize the vm disk.

What strange is the VM knows about the cloud-init config

root@tango:/var/lib/vz/snippets# qm config 100
acpi: 1
agent: enabled=1,fstrim_cloned_disks=1,type=virtio
balloon: 0
bios: ovmf
boot: order=virtio0;net0
cicustom: user=local:snippets/ktest-cp01-cloud-init-debian-k8s.yaml
cores: 2
cpu: cputype=host
cpuunits: 1024
description: Managed by Terraform
efidisk0: local-lvm:vm-100-disk-0,efitype=4m,pre-enrolled-keys=0,size=4M
ide0: local-lvm:vm-100-cloudinit,media=cdrom
ipconfig0: gw=10.20.0.254,ip=10.20.0.170/24
keyboard: en-us
machine: q35
memory: 2048
meta: creation-qemu=9.0.2,ctime=1731768502
name: ktest-cp01
net0: virtio=BC:24:11:88:2C:D8,bridge=vmbr0,firewall=0
numa: 0
onboot: 0
ostype: l26
protection: 0
scsihw: virtio-scsi-single
smbios1: uuid=7e66bf93-e236-44e3-b085-b537b4ed1a9c
sockets: 2
tablet: 1
template: 0
vcpus: 2
vga: memory=16,type=std
virtio0: local-lvm:vm-100-disk-1,aio=native,backup=1,cache=none,discard=ignore,iothread=1,replicate=0,size=20G
vmgenid: 7d7f668f-a6a9-4966-b5dd-52ce6be13947

and a dump of cloud-init:

root@tango:/var/lib/vz/snippets# qm cloudinit dump 100 user
#cloud-config
hostname: ktest-cp01
manage_etc_hosts: true
fqdn: ktest-cp01
chpasswd:
  expire: False
users:
  - default
package_upgrade: true
root@tango:/var/lib/vz/snippets# qm cloudinit dump 100 network
version: 1
config:
    - type: physical
      name: eth0
      mac_address: 'bc:24:11:88:2c:d8'
      subnets:
      - type: static
        address: '10.20.0.170'
        netmask: '255.255.255.0'
        gateway: '10.20.0.254'
    - type: nameserver
      address:
      - '10.20.0.247'
      search:
      - 'mmely.de'
root@tango:/var/lib/vz/snippets# qm cloudinit dump 100 meta
instance-id: 478eb074e14118b79f8e0cefdde216f7e5b63cef

After restarting manually the VM, then it boots ok and no kernel panic is shown, but i cannot see the hostname, user or any other cloud-init definition being applied:

image

My environment is:

Terraform: 1.9.8 bpg/proxmox: 0.66.3 Proxmox: 8.2.8

As i said, this used to work some time ago. Any idea?

Thanks in advance ;)

mmelyp commented 3 days ago

I'm closing this issue. It is now working after removing

  started     = true

  machine       = "q35"
  scsi_hardware = "virtio-scsi-single"
  bios          = "ovmf"

  efi_disk {
    datastore_id = "local-lvm"
    file_format  = "raw"
    type         = "4m"
  }

and adding:

  serial_device {
    device = "socket"
  } 
bpg commented 20 hours ago

don't really know what's happened there 🤷🏼‍♂️