hashicorp / terraform-provider-vsphere

Terraform Provider for VMware vSphere
https://registry.terraform.io/providers/hashicorp/vsphere/
Mozilla Public License 2.0
611 stars 450 forks source link

Failed to query disk type #1191

Open pryorda opened 4 years ago

pryorda commented 4 years ago

Terraform Version

0.12.28

vSphere Provider Version

1.22.0+

vSphere Version

6.5

Affected Resource(s)

vsphere_virtual_disk

Terraform Configuration Files

locals {
  host_basename = coalesce(
    var.hostname_override,
    "${var.environment}-${var.application}-${var.component}",
  )
  role = "${var.application}-${var.component}"
}

data "vsphere_datacenter" "datacenter" {
  name = var.vsphere_datacenter
}

data "vsphere_compute_cluster" "cluster" {
  name          = var.vsphere_cluster
  datacenter_id = data.vsphere_datacenter.datacenter.id
}

data "vsphere_datastore" "datastore" {
  name          = var.vsphere_datastore
  datacenter_id = data.vsphere_datacenter.datacenter.id
}

data "vsphere_resource_pool" "pool" {
  name          = "${var.vsphere_cluster}/Resources"
  datacenter_id = data.vsphere_datacenter.datacenter.id
}

data "vsphere_network" "network" {
  name          = var.vsphere_network_label
  datacenter_id = data.vsphere_datacenter.datacenter.id
}

data "vsphere_virtual_machine" "template" {
  name          = var.vsphere_template
  datacenter_id = data.vsphere_datacenter.datacenter.id
}

# DNS for instances
module "instance-dns" {
  source = "../instance_dns"

  instance_count    = var.instance_count
  service_provider  = "op"
  ttl               = var.vsphere_network_ipv4_addresses != "" ? 900 : 60
  region            = lower(var.region)
  create_regionless = false
  hostname          = local.host_basename
  ips               = vsphere_virtual_machine.instance.*.default_ip_address
  allow_overwrite   = var.allow_dns_overwrite
}

# Instance Resource
resource "vsphere_virtual_machine" "instance" {
  depends_on = [vsphere_virtual_disk.data_disk]

  count            = var.instance_count
  name             = "${lower(local.host_basename)}${count.index + 1}.${lower(var.region)}"
  resource_pool_id = data.vsphere_resource_pool.pool.id
  datastore_id     = data.vsphere_datastore.datastore.id
  guest_id         = data.vsphere_virtual_machine.template.guest_id
  scsi_type        = data.vsphere_virtual_machine.template.scsi_type

  folder = var.vsphere_folder_path

  num_cpus           = var.vsphere_vcpu
  memory             = var.vsphere_memory
  memory_reservation = var.vsphere_reserved_memory

  enable_disk_uuid           = true
  wait_for_guest_net_timeout = var.vsphere_network_ipv4_addresses != "" ? 5 : 180
  sync_time_with_host        = var.sync_time_with_host

  # Prevent attributes from going null in 0.12
  custom_attributes = {}
  extra_config      = {}
  tags              = []

  network_interface {
    network_id     = data.vsphere_network.network.id
    adapter_type   = data.vsphere_virtual_machine.template.network_interface_types[0]
    mac_address    = var.static_macs != "" ? element(split(",", var.static_macs), count.index) : ""
    use_static_mac = var.static_macs == "" ? false : true
  }

  // This doesn't actually work and is a work around for customize spec.
  cdrom {
    datastore_id = data.vsphere_datastore.datastore.id
    path         = "ISOs/os-livecd.iso"
  }

  disk {
    path             = "${lower(local.host_basename)}${count.index + 1}.${lower(var.region)}.vmdk"
    label            = "disk0"
    size             = var.root_disk_size
    eagerly_scrub    = data.vsphere_virtual_machine.template.disks[0].eagerly_scrub
    thin_provisioned = data.vsphere_virtual_machine.template.disks[0].thin_provisioned
  }

  dynamic "disk" {
    for_each = flatten(length(vsphere_virtual_disk.data_disk) > 0 ? [element(vsphere_virtual_disk.data_disk, count.index)] : [])

    content {
      path         = disk.value.vmdk_path
      label        = "disk1"
      attach       = true
      unit_number  = 1
      datastore_id = data.vsphere_datastore.datastore.id
    }
  }

  clone {
    template_uuid = data.vsphere_virtual_machine.template.id

    customize {
      dns_suffix_list = concat(["${lower(var.region)}.${lower(var.service_provider)}.example.com"], split(",", var.vsphere_network_domain_search))
      dns_server_list = split(
        ",",
        var.vsphere_network_ipv4_addresses != "" ? var.vsphere_network_domain_resolvers : "",
      )

      linux_options {
        host_name = "${lower(local.host_basename)}${count.index + 1}"
        domain    = "${lower(var.region)}.${lower(var.service_provider)}.example.com"
        time_zone = var.vsphere_cluster_timezone
      }

      network_interface {
        ipv4_address = element(split(",", var.vsphere_network_ipv4_addresses), count.index)
        ipv4_netmask = var.vsphere_network_ipv4_prefix_length
      }

      ipv4_gateway = var.vsphere_network_ipv4_gateway
    }
  }

  lifecycle {
    ignore_changes = [
      disk,
      clone,
      poweron_timeout,
      ide_controller_count,
      sata_controller_count,
    ]
  }

  provisioner "local-exec" {
    when = destroy

    interpreter = ["bash", "-c"]

    command = <<EOT
...
EOT

  }
}

resource "vsphere_virtual_disk" "data_disk" {
  count      = var.data_disk_size > 0 ? var.instance_count : 0
  size       = var.data_disk_size
  vmdk_path  = "${lower(local.host_basename)}${count.index + 1}.${lower(var.region)}.data_disk.vmdk"
  datacenter = var.vsphere_datacenter
  datastore  = var.vsphere_datastore
  type       = "thin"

  lifecycle {
    prevent_destroy = false
  }
}

# Create Virtual Machine Anti-Affinity Rules
resource "vsphere_compute_cluster_vm_anti_affinity_rule" "cluster_vm_anti_affinity_rule" {
  count               = var.instance_count > 0 ? 1 : 0
  name                = "${lower(local.host_basename)}.${lower(var.region)}.${lower(var.service_provider)}"
  compute_cluster_id  = data.vsphere_compute_cluster.cluster.id
  virtual_machine_ids = vsphere_virtual_machine.instance.*.id
}

# Fun hack explained here https://github.com/hashicorp/terraform/issues/16580#issuecomment-342573652
output "instance_ids" {
  value = vsphere_virtual_machine.instance.*.uuid
}

output "instances_dns" {
  value = formatlist(
    "%s.%s",
    concat(vsphere_virtual_machine.instance.*.name, [""]),
    "int.example.com",
  )
}

output "instance_private_ips" {
  value      = vsphere_virtual_machine.instance.*.default_ip_address
  depends_on = [vsphere_virtual_machine.instance]
}

Debug Output

Panic Output

na

Expected Behavior

Apply successfully like in 1.21.0

Actual Behavior

Dies with failed to query disk type

Steps to Reproduce

terraform apply

References

Bug was introduced with https://github.com/hashicorp/terraform-provider-vsphere/commit/4fd6f8e89ec3cf9320554503689a60e464fb8bd4

Community Note

pryorda commented 4 years ago

@skevir @bill-rich I added some logging locally and I'm getting a LocalizedMethodFault. What was the purpose of adding the type in the disk read?

pryorda commented 4 years ago

Looks like the issue is specific to 6.5. Is there anyone else who can try?

bill-rich commented 4 years ago

Hi @pryorda! Can you please include the debug log that includes the error you're running into?

pryorda commented 4 years ago

I can't at this time. I can only provide the line that its falling into and the error added by the statement I added. It appears 6.5 does not give that attribute back.

pryorda commented 4 years ago

Would it get approved if I did a pull request to make it warn if the attribute doesn't exist rather then dying?

bill-rich commented 3 years ago

@pryorda Yes. That would be great. It was added as part of supporting importing disk resources, but it isn't necessary outside of that case.

anupugalavat commented 3 years ago

@bill-rich could you please let us know when will a fix be provided for this

prologic commented 2 years ago

@bill-rich @pryorda @anupugalavat Are you guys able to review my colleague's fix for this in #1447 ? Our work-around at the moment is to pin the vsphere provider to 1.21.0.

tenthirtyam commented 2 years ago

@appilon, the change in 1447 seems pretty innocuous to me to log a warning rather than error/fail. What are your thought?

Ryan

appilon commented 2 years ago

I took a quick look at the linked PR @tenthirtyam , I'm not sure it's appropriate to exit early if no disk type can be read, why is it that this attribute fails, do disks not always have a disk type? I worry merging that PR will cause state drift.

tenthirtyam commented 2 years ago

Good point, @appilon - I'll add this to my list of those to review.

Ryan

tenthirtyam commented 2 years ago

Do you have a simplified Terraform example that experienced this issue - I could use it for testing/reproduction purposes.

Has this issue been experienced in vSphere 7.0?

Ryan Johnson Senior Staff Solutions Architect | Product Engineering @ VMware, Inc.

prologic commented 2 years ago

@tenthirtyam Sincere apologies I haven't had the chance to go back to my team and try to get a repro for this. Will do as soon as I can! 👌

deebsman commented 2 years ago

@tenthirtyam Tried to cut this down to a simple single file as we use multiple modules.

It's an issue on 6.5, 6.7, 7.0.

Only solution for us is to pin provider version to 1.21.0.

terraform {
  required_version = ">= 0.13"
  required_providers {
    vsphere = {
      source  = "hashicorp/vsphere"
      #version = "1.21.0" # last working version
    }
  }
}

provider "vsphere" {
  user           = var.vsphere_user
  password       = var.vsphere_password
  vsphere_server = var.vsphere_server
}

variable "environment" {
  default = "test"
}
variable "swarm_count" {
  default = "1"
}
variable "disk_size" {
  default = "1"
}
variable "additional_disks" {
  default = "3"
}
variable "additional_disks_size" {
  default = "1"
}
variable "vsphere_server" {
  default = ""
}
variable "vsphere_datacenter" {
  default = ""
}
variable "vsphere_datastore" {
  default = ""
}
variable "vsphere_resource_pool" {
  default = ""
}
variable "vsphere_network" {
  default = ""
}
variable "vsphere_cluster" {
  default = ""
}
variable "vsphere_template" {
  default = ""
}
variable "vsphere_folder_path" {
  default = ""
}

locals {
  node_name = "1191repro"
  disk-list = flatten([
    for disk in range(1, var.additional_disks + 1, 1) : [
      for node in range(1, var.swarm_count + 1, 1) : {
        node = node
        disk = disk
      }
    ]
  ])
}

data "vsphere_datacenter" "dc" {
  name = var.vsphere_datacenter
}
data "vsphere_datastore" "datastore" {
  name          = var.vsphere_datastore
  datacenter_id = data.vsphere_datacenter.dc.id
}
data "vsphere_resource_pool" "pool" {
  name          = var.vsphere_resource_pool
  datacenter_id = data.vsphere_datacenter.dc.id
}
data "vsphere_network" "network" {
  name          = var.vsphere_network
  datacenter_id = data.vsphere_datacenter.dc.id
}
data "vsphere_compute_cluster" "cluster" {
  name          = var.vsphere_cluster
  datacenter_id = data.vsphere_datacenter.dc.id
}
data "vsphere_virtual_machine" "template" {
  name          = "${var.vsphere_template}-${var.environment}"
  datacenter_id = data.vsphere_datacenter.dc.id
}

resource "vsphere_virtual_machine" "vm" {
  count            = var.swarm_count
  depends_on       = [time_sleep.disk_wait, vsphere_virtual_disk.additional_disk]
  name             = "1191repro"
  resource_pool_id = data.vsphere_resource_pool.pool.id
  datastore_id     = data.vsphere_datastore.datastore.id
  folder           = var.vsphere_folder_path

  num_cpus                   = 1
  memory                     = 1024
  guest_id                   = data.vsphere_virtual_machine.template.guest_id
  firmware                   = data.vsphere_virtual_machine.template.firmware
  scsi_type                  = data.vsphere_virtual_machine.template.scsi_type
  sync_time_with_host        = true
  wait_for_guest_ip_timeout  = 300
  shutdown_wait_timeout      = 1
  force_power_off            = true

  disk {
    label            = "bootdisk"
    size             = data.vsphere_virtual_machine.template.disks.0.size > var.disk_size ? data.vsphere_virtual_machine.template.disks.0.size : var.disk_size
    eagerly_scrub    = data.vsphere_virtual_machine.template.disks.0.eagerly_scrub
    thin_provisioned = data.vsphere_virtual_machine.template.disks.0.thin_provisioned
  }

  dynamic "disk" {
    for_each = range(var.additional_disks)

    content {
      path         = "test/${var.environment}/${local.node_name}${count.index + 1}/disk${local.disk-list[(disk.value * var.swarm_count)].disk}.vmdk"
      label        = "${local.node_name}${count.index + 1}-disk${local.disk-list[(disk.value * var.swarm_count)].disk}"
      attach       = true
      unit_number  = local.disk-list[(disk.value * var.swarm_count)].disk
      datastore_id = data.vsphere_datastore.datastore.id
    }
  }

  network_interface {
    network_id = data.vsphere_network.network.id
  }

  clone {
    template_uuid = data.vsphere_virtual_machine.template.id
  }
}

resource "time_sleep" "disk_wait" {
  depends_on       = [vsphere_virtual_disk.additional_disk]
  destroy_duration = "20s"
}

resource "vsphere_virtual_disk" "additional_disk" {
  count              = var.additional_disks
  size               = var.additional_disks_size
  vmdk_path          = "test/${var.environment}/${local.node_name}${local.disk-list[count.index].node}/disk${local.disk-list[count.index].disk}.vmdk"
  datacenter         = var.vsphere_datacenter
  datastore          = var.vsphere_datastore
  type               = "thin"
  create_directories = true
}
tenthirtyam commented 2 years ago

Thanks for sharing, @deebsman.

@appilon and I can use this example for further investigation when time permits.

Ryan Johnson Senior Staff Solutions Architect | Product Engineering @ VMware, Inc.

dimitarproynov commented 1 year ago

I'll try to reproduce with the simplified config and debug the issue.

tenthirtyam commented 1 year ago

Thanks, Dimitar! I've reassigned the issue to you during your investigation.

dimitarproynov commented 1 year ago

Hi all,

I've passed with a debugger thru the code mentioned at 4fd6f8e (vdm.QueryVirtualDiskInfo). There is a call for each additional disk in the above .tf snippet. In all cases the vdm.QueryVirtualDiskInfo returned without errors on both 6.7 and 7.0 (latest patch versions).

That being said the virtual machine creation "vm" from the above snippet is stuck in a loop. The vSphere UI shows that the VM has been created and the vim task has completed, but the govmomi code is stuck at virtualmachine.WaitForGuestIP. I assume the issue is in my testbed.

Does this bug reproduce after the VM (from the short snippet above) is cloned or before that?

Regards, _Proynov

dimitarproynov commented 1 year ago

I've circumvented the WaitForGuestIP and WaitForGuestNet, alas the "terraform apply" succeeds on both 6.7 and 7.0 latest patch.

tenthirtyam commented 1 year ago

Added the label not-reproduced based on Dimitar’s testing using the sample provided previously in the issue.

dimitarproynov commented 1 year ago

@deebsman there has to be something more to it. Maybe some particularity in your environment that contributes to the issue being reproduced.

My testing environment was the following:

VC (6.7 and 7.0.3 latest)

Your sample clones a VM from template and adds additional disks to it. Maybe there is something special in the VM Template you've used to reproduce the issue.

Regards, _Proynov

dimitarproynov commented 1 year ago

Are there any error statements in the vcenter logs?

You can find them via ssh'ing and looking up the /var/log/vmware/vpxd/vpxd-X.log file.

mostuff commented 1 year ago

@dimitarproynov This appears to be an issue when vsphere_virtual_disk.additional_disk.datastore is a path rather than a name.

resourceVSphereVirtualDiskCreate() quite happily accepts a datastore path because it calls getDatastore() which splits the path and searches by parent/child to find the datastore, and so the disks are created.

resourceVSphereVirtualDiskRead() however passes the datastore as specified to QueryDiskType() which doesn't differentiate between path and name and ends up erroring with Invalid datastore path '[ds cluster/ds name] path/to/file.vmdk'

It works when datastore is a name.

I'm not sure if there's a bug here as the documentation for vsphere_virtual_disk does say that datastore should be set to the name of the datastore. It is a bit inconsistent though as the vsphere_datastore data source accepts a path or a name.

mike-sol commented 5 months ago

This issue has begun to happen to me on 2.7.0 against VCenter 7.0.3. It does not happen under 2.6.1. I am definitely passing the datastore by name and the path separately, see below:

# Optional secondary disk
resource "vsphere_virtual_disk" "dual" {

  count = var.vmware_dual_disk.provision ? 1 : 0

  datastore          = var.vmware_dual_disk.datastore_name
  vmdk_path          = "/dual_disks/${var.host.hostname}-dual.vmdk"
  create_directories = true
  type               = "thin"
  size               = var.vmware_dual_disk.size

  # This disk is precious!
  lifecycle {
    prevent_destroy = false
  }
}

Happy to help debug further if desired.