oracle / terraform-provider-oci

Terraform Oracle Cloud Infrastructure provider
https://www.terraform.io/docs/providers/oci/
Mozilla Public License 2.0
760 stars 684 forks source link

User_data fails to start a service when executed through a Powershell script #1911

Open pathaknikhil87 opened 1 year ago

pathaknikhil87 commented 1 year ago

Community Note

Terraform Version and Provider Version

Terraform version - v1.0.11

Provider version - v5.1.0

Affected Resource(s)

Windows 2022 standard, WIndows 2019 standard Instance fail to start / connect through SSH. The reason is that the PowerShell script executed through user_data fails to start the sshd service.
Below is the PowerShell script.

Terraform Configuration Files

variable "instances" {
  default= {
    prod = {
      server1  = { instance_shape ="VM.Standard.E4.Flex", hostname="ibm", needs_public_ip= false, subnet1 ="subnet_app", subnet2 ="subnet_web", memory="8", cpu= "1", sshkey = "key1", image = "Oracle-Linux-8.7-2023.05.24-0", script = "./scripts/oracle-linux-generic-postinstall.sh", tags = [ "os:windows", "version:2019", "edition:standard", "sp:null", "arch:x86_64", "appgrp:pes", "app:jump", "site:primary","dsg:ibm_jump","sap_sid:ibm" ]  } #pnic_subnet = "lz_vpc_ibm_subnet", pnic_name = "primary", snic_subnet   = "lz_vpc_customer_subnet", snic_name = "secondary", sgs = ["lz_dsg", "ibm_jump", "ibm_ad_client_lz", "cis_jump" ], script = "./script/win-presetup-v3.ps1", sshkey = "ansible"
      server2  = { instance_shape ="VM.Standard.E4.Flex", hostname="ibm2", needs_public_ip= false, subnet1 ="subnet_app", subnet2 ="subnet_web", memory="8", cpu= "1", sshkey = "key1", image = "Oracle-Linux-8.7-2023.05.24-0", script = "./scripts/oracle-linux-generic-postinstall.sh", tags = [ "os:windows", "version:2019", "edition:standard", "sp:null", "arch:x86_64", "appgrp:pes", "app:jump", "site:primary","dsg:ibm_jump","sap_sid:ibm2" ]  } #pnic_subnet = "lz_vpc_ibm_subnet", pnic_name = "primary", snic_subnet   = "lz_vpc_customer_subnet", snic_name = "secondary", sgs = ["lz_dsg", "ibm_jump", "ibm_ad_client_lz", "cis_jump" ], script = "./script/win-presetup-v3.ps1", sshkey = "ansible"
      server3  = { instance_shape ="VM.Standard.E4.Flex", hostname="ibm3-win", needs_public_ip= false, subnet1 ="subnet_app", subnet2 ="subnet_web", memory="8", cpu= "1", sshkey = "key1", image = "Windows-Server-2019-Standard-Edition-VM-2023.05.24-0", script = "./scripts/win-presetup-v3.ps1", tags = [ "os:windows", "version:2019", "edition:standard", "sp:null", "arch:x86_64", "appgrp:pes", "app:jump", "site:primary","dsg:ibm_jump","sap_sid:ibm2" ]  } #pnic_subnet = "lz_vpc_ibm_subnet", pnic_name = "primary", snic_subnet   = "lz_vpc_customer_subnet", snic_name = "secondary", sgs = ["lz_dsg", "ibm_jump", "ibm_ad_client_lz", "cis_jump" ], script = "./script/win-presetup-v3.ps1", sshkey = "ansible"
      server4  = { instance_shape ="VM.Standard.E4.Flex", hostname="ibm4", needs_public_ip= false, subnet1 ="subnet_app", subnet2 ="cust_subnet_app", memory="8", cpu= "1", sshkey = "key1", image = "Oracle-Linux-8.7-2023.05.24-0", script = "./scripts/oracle-linux-generic-postinstall.sh", tags = [ "os:oracle", "version:2019", "edition:standard", "sp:null", "arch:x86_64", "appgrp:pes", "app:jump", "site:primary","dsg:ibm_jump","sap_sid:ibm4" ]  } #pnic_subnet = "lz_vpc_ibm_subnet", pnic_name = "primary", snic_subnet   = "lz_vpc_customer_subnet", snic_name = "secondary", sgs = ["lz_dsg", "ibm_jump", "ibm_ad_client_lz", "cis_jump" ], script = "./script/win-presetup-v3.ps1", sshkey = "ansible"
      server5  = { instance_shape ="VM.Standard.E4.Flex", hostname="ibm5", needs_public_ip= false, subnet1 ="subnet_web", subnet2 ="cust_subnet_web", memory="8", cpu= "1", sshkey = "key1", image = "Oracle-Linux-8.7-2023.05.24-0", script = "./scripts/oracle-linux-generic-postinstall.sh", tags = [ "os:oracle", "version:2019", "edition:standard", "sp:null", "arch:x86_64", "appgrp:pes", "app:jump", "site:primary","dsg:ibm_jump","sap_sid:ibm5" ]  } #pnic_subnet = "lz_vpc_ibm_subnet", pnic_name = "primary", snic_subnet   = "lz_vpc_customer_subnet", snic_name = "secondary", sgs = ["lz_dsg", "ibm_jump", "ibm_ad_client_lz", "cis_jump" ], script = "./script/win-presetup-v3.ps1", sshkey = "ansible"
      server6  = { instance_shape ="VM.Standard.E4.Flex", hostname="ibm6-win", needs_public_ip= false, subnet1 ="subnet_db", subnet2 ="cust_subnet_db", memory="8", cpu= "1", sshkey = "key1", image = "Windows-Server-2019-Standard-Edition-VM-2023.05.24-0", script = "./scripts/win-presetup-v3.ps1", tags = [ "os:windows", "version:2019", "edition:standard", "sp:null", "arch:x86_64", "appgrp:pes", "app:jump", "site:primary","dsg:ibm_jump","sap_sid:ibm6" ]  } #pnic_subnet = "lz_vpc_ibm_subnet", pnic_name = "primary", snic_subnet   = "lz_vpc_customer_subnet", snic_name = "secondary", sgs = ["lz_dsg", "ibm_jump", "ibm_ad_client_lz", "cis_jump" ], script = "./script/win-presetup-v3.ps1", sshkey = "ansible"
    }
  }
}

data "oci_core_images" "windows_images" {
  compartment_id           = var.tenancy_ocid

   operating_system         = "Windows"
   operating_system_version = "Server 2019 Standard"
   shape                    = "VM.Standard.E4.Flex"
   sort_by                  = "TIMECREATED"
   sort_order               = "DESC"
}

data "oci_core_images" "ol_images" {
  compartment_id           = var.tenancy_ocid

#   operating_system         = "Oracle Linux"
#   operating_system_version = "8"
#   shape                    = "VM.Standard.E4.Flex"
#   sort_by                  = "TIMECREATED"
#   sort_order               = "DESC"
}

data "oci_identity_availability_domains" "ad" {
  compartment_id = var.tenancy_ocid
}

resource "oci_core_instance" "instance" {
  for_each                  = var.instances["prod"]
  compartment_id                      = local.workload_compartment_ocid
  availability_domain                 = lookup(local.availability_domains[0], "name")
  shape                               = each.value.instance_shape
  is_pv_encryption_in_transit_enabled = true
  display_name                        = each.value.hostname

freeform_tags = {for v in each.value.tags: 
    split(":", v)[0] => split(":", v)[1]}

  create_vnic_details {
    assign_public_ip = each.value.needs_public_ip
    display_name     = "${each.value.hostname}-vnic"
    hostname_label   = each.value.hostname
    subnet_id        = local.priv_subnets[each.value.subnet1]
  }

  shape_config {
    memory_in_gbs             = each.value.memory
    ocpus                     = each.value.cpu
    baseline_ocpu_utilization = "BASELINE_1_1"
  }

  metadata = {
    ssh_authorized_keys = local.keys[each.value.sshkey]
    user_data = "${lookup(each.value, "script", "*")}" != "None" ? base64encode(file("${lookup(each.value, "script", "*")}")) : ""
  }

  source_details {
    source_type = "image"
    source_id   =  local.images[each.value.image].id
  }

  launch_options {
    is_pv_encryption_in_transit_enabled = true
    network_type                        = "PARAVIRTUALIZED"
  }

  timeouts {
    create = "10m"
  }
}

resource "oci_core_vnic_attachment" "test_vnic_attachment" {
   for_each                  = local.workload_second_interface
  create_vnic_details {
    assign_public_ip = each.value.needs_public_ip
    display_name     = "${each.value.hostname}-vnic2"
    hostname_label   = each.value.hostname
    subnet_id        = local.priv_subnets[each.value.subnet2]
  }

    instance_id = oci_core_instance.instance[each.key].id

    # #Optional
    # display_name = var.vnic_attachment_display_name
    # nic_index = var.vnic_attachment_nic_index
}
locals {
        availability_domains = data.oci_identity_availability_domains.ad.availability_domains
        workload_compartment_ocid = module.prod_environment.workload_compartment_id
        network_compartment_ocid = module.prod_environment.compartment.network.id
        #network_compartment_name = "Test_net_compartment"
        #workload_compartment_name = "Test_wrk_compartment"
  priv_subnets = {
        subnet_web          = module.prod_environment.subnets["OCI-ELZ-SUB-${local.prod_environment.environment_prefix}-SPK-${local.region_key[0]}001"]   #module.prod_environment.module.network.module.spoke.
        subnet_app          = module.prod_environment.subnets["OCI-ELZ-SUB-${local.prod_environment.environment_prefix}-SPK-${local.region_key[0]}002"]
        subnet_db           = module.prod_environment.subnets["OCI-ELZ-SUB-${local.prod_environment.environment_prefix}-SPK-${local.region_key[0]}003"]
        hub_public_subnet = module.prod_environment.subnets["OCI-ELZ-SUB-${local.prod_environment.environment_prefix}-HUB-${local.region_key[0]}001"]
        hub_private_subnet = module.prod_environment.subnets["OCI-ELZ-SUB-${local.prod_environment.environment_prefix}-HUB-${local.region_key[0]}002"]

        cust_subnet_web          = module.prod_environment.subnets["OCI-ELZ-SUB-${local.prod_environment.environment_prefix}-CUST-SPK-${local.region_key[0]}001"]   #module.prod_environment.module.network.module.spoke.
        cust_subnet_app          = module.prod_environment.subnets["OCI-ELZ-SUB-${local.prod_environment.environment_prefix}-CUST-SPK-${local.region_key[0]}002"]
        cust_subnet_db           = module.prod_environment.subnets["OCI-ELZ-SUB-${local.prod_environment.environment_prefix}-CUST-SPK-${local.region_key[0]}003"]

        cust_hub_private_subnet = module.prod_environment.subnets["OCI-ELZ-SUB-${local.prod_environment.environment_prefix}-CUST-HUB-${local.region_key[0]}002"]
    }
    keys = {
    key1 = file("./keys/ansible.pub")
    }
hub_subnets = {
    hub_public_subnet = module.prod_environment.subnets["OCI-ELZ-SUB-${local.prod_environment.environment_prefix}-HUB-${local.region_key[0]}001"]
    hub_private_subnet = module.prod_environment.subnets["OCI-ELZ-SUB-${local.prod_environment.environment_prefix}-HUB-${local.region_key[0]}002"]
}
    images =  { for k in data.oci_core_images.ol_images.images : k.display_name => {
        id = k.id
        pv_enabled = k.launch_options[0].is_pv_encryption_in_transit_enabled
        network_type = k.launch_options[0].network_type
    } }
    #images =  data.oci_core_images.ol_images.images
    workload_second_interface = { for k , v in var.instances["prod"] : k => v if v.subnet2 != "None"}
    bastion_second_interface = { for k , v in var.bastion["prod"] : k => v if v.subnet2 != "None"}
}

Script which we used for user_data for windows servers

function Get-RandomPassword {
    param (
        [Parameter(Mandatory)]
        [int] $length,
        [int] $amountOfNonAlphanumeric = 1
    )
    Add-Type -AssemblyName 'System.Web'
    return [System.Web.Security.Membership]::GeneratePassword($length, $amountOfNonAlphanumeric)
}
$userName = "ansibleuser"
$userhome = "C:\Users\"+ $userName
$sshpath = "C:\ProgramData\ssh\sshd_config"
$sshdirectory = $userhome + "\.ssh"
$authkeys = $sshdirectory + "\authorized_keys"
$asuser = whoami
$pass   = Get-RandomPassword 16

start-sleep -s 60

if (! $args[0]){
    $pkey = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQAXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX ssh-key-2023-06-07"
}else {
    $pkey = $args[0]
}

function Configure-SSHKey{
    # This need to be added and called after SSh is started and key is created, There needs to be clarity on steps to be performed.
    (Get-Content $sshpath).replace('#SyslogFacility AUTH', 'SyslogFacility LOCAL0') | Set-Content $sshpath
    (Get-Content $sshpath).replace('#LogLevel INFO', 'LogLevel INFO') | Set-Content $sshpath
    (Get-Content $sshpath).replace('#PasswordAuthentication yes', 'PasswordAuthentication no') | Set-Content $sshpath
    (Get-Content $sshpath).replace('Match Group administrators', '#Match Group administrators') | Set-Content $sshpath
    (Get-Content $sshpath).replace('AuthorizedKeysFile __PROGRAMDATA__/ssh/administrators_authorized_keys', '#AuthorizedKeysFile __PROGRAMDATA__/ssh/administrators_authorized_keys') | Set-Content $sshpath
    (Get-Content $sshpath).replace('Match Group administrators', '#Match Group administrators') | Set-Content $sshpath
    Add-Content $sshpath "AllowGroups sshusers"
    New-Item $authkeys -ItemType File -Value $pkey
    #Add-Content $authkeys $pkey
    icacls.exe $authkeys /inheritance:d /grant:r ansibleuser:F /T
    icacls.exe $sshdirectory /inheritance:d /grant:r ansibleuser:F /T
    icacls.exe $sshdirectory /inheritance:d
    icacls.exe $sshdirectory /remove $asuser
    icacls.exe $sshdirectory /remove everyone
    icacls.exe $sshdirectory /remove user
    icacls.exe $authkeys /inheritance:d
    icacls.exe $authkeys /remove $asuser
    icacls.exe $authkeys /remove everyone
    icacls.exe $authkeys /remove user
    icacls.exe $sshdirectory
    icacls.exe $authkeys
}

function Add-User{

            New-LocalGroup -Name "sshusers" >> c:\debug.txt
            echo "Setting up Ansible user and adding to Administrator group."
            $secure = ConvertTo-SecureString -String $pass -AsPlainText -Force
            New-LocalUser -Name $userName -Description "Ansible autoamtion user." -Password $secure
            Invoke-WebRequest https://download.sysinternals.com/files/PSTools.zip -OutFile pstool.zip
            Expand-Archive pstool.zip -DestinationPath pstool
            .\pstool\PsExec.exe  -accepteula  -nobanner -u $username -p $pass hostname
            Remove-Item pstool* -Force -Recurse
            mkdir -p $sshdirectory
            Add-LocalGroupMember -Group "Administrators" -Member $userName
            Add-LocalGroupMember -Group "Remote Desktop Users" -Member $userName
            Add-LocalGroupMember -Group "sshusers" -Member $userName
            $created = Get-LocalUser -Name $userName
            echo $userhome
            icacls.exe $userhome
            icacls.exe $sshdirectory
}

function Start-SSH($service){
    # Function to check SSH status and start if found to be stopped
    try {
        $Service = Get-Service -Name sshd
        if ($Service.Status -eq "Stopped"){
            $status = Start-Service sshd
            echo "The status is `n$status"
            $Service = Get-Service -Name sshd
            echo $Service >> c:\debug.txt
        }else {
            echo $Service.Status >> c:\debug.txt
        }
    }
    catch [Exception]{
        echo $_.Exception.GetType().FullName, $_.Exception.Message >> c:\debug.txt

    }
    return $Service.Status
}

function Install-OpenSSH{
    # Function to install Openssh client and Server if not installed.
    echo "Starting function Install-OpenSSH" >> c:\debug.txt
    echo "NExt line is getting the string" >> c:\debug.txt
    $string = Get-WindowsCapability -Online | ? Name -like 'OpenSSH*'
    echo "The value of string is `n$string" >> c:\debug.txt
    for ($i=0 ; $i -lt $string.name.length; $i++){
        $status = $string.State[$i]
        echo "Status is `n$status" >> c:\debug.txt
        if ($status -eq "NotPresent") {
            try {
                if ($string.name[$i] -like 'OpenSSH.Server*'){
                    Write-Output "installing "$string.name[$i] >> c:\debug.txt
                    Add-WindowsCapability -Online -Name $string.name[$i] >> c:\debug.txt
                    echo "Installation of $string.name[$i] completed" >> c:\debug.txt
                    New-NetFirewallRule -Name sshd -DisplayName 'OpenSSH Server (sshd)' -Enabled True -Direction Inbound -Protocol TCP -Action Allow -LocalPort 22 >> c:\debug.txt
                    Start-SSH($string.name[$i]) >> c:\debug.txt
                }
                else {
                    Write-Output "installing "$string.name[$i] >> c:\debug.txt
                    Add-WindowsCapability -Online -Name $string.name[$i] >> c:\debug.txt
                    echo "Installation of $string.name[$i] completed" >> c:\debug.txt
                    Get-WindowsCapability -Online | ? Name -like 'OpenSSH*' >> c:\debug.txt
                }
            }
            catch [Exception]{
                echo $_.Exception.GetType().FullName, $_.Exception.Message >> c:\debug.txt
            }
        }else {
            echo $string.name[$i] "Installed" >> c:\debug.txt
        }

    }
 }

 # Calling each function in sequence of execution
 Install-OpenSSH
 Add-User
 Configure-SSHKey
 Start-SSH

echo "Restarting ssh" >> c:\debug.txt
Stop-Service sshd
start-sleep -s 15
Start-Service sshd >> c:\debug.txt
Set-Service sshd -StartupType Automatic
echo "Getting the sshd service" >> c:\debug.txt
get-service sshd >> c:\debug.txt

Debug Output

Panic Output

Expected Behavior

The server should be having the sshd service running and we should be able to connect through ssh.

Actual Behavior

The sshd service is in stopped state and requires one to start it manually. Once the service is started, ssh is allowed to the windows instance.

Steps to Reproduce

  1. terraform apply

Important Factoids

References

tf-oci-pub commented 1 year ago

Thank you for reporting the issue. We observed the affected resources are not provided in the description or it's incorrect. We request you to add it in issue description as mentioned in below format. Example: affected_resources = oci_core_instance , oci_core_instances

If it's not related to any particular resource then mention affected resource as terraform. Example: affected_resources = terraform

As this works through automation, request you to follow exact syntax.

pathaknikhil87 commented 1 year ago

Yes, the affected resource is oci_core_instance.