canonical / lxd

Powerful system container and virtual machine manager
https://canonical.com/lxd
GNU Affero General Public License v3.0
4.38k stars 931 forks source link

LXD 3.19: Cannot remove cluster member #6770

Closed Ne02ptzero closed 4 years ago

Ne02ptzero commented 4 years ago

On LXD 3.19:

$> lxc cluster remove XXX
Error: Delete https://10.33.0.16:8443/internal/cluster/accept: http: server gave HTTP response to HTTPS client

With --debug

$> lxc cluster remove XXX --debug
DBUG[01-24|13:39:26] Connecting to a local LXD over a Unix socket 
DBUG[01-24|13:39:26] Sending request to LXD                   method=GET url=http://unix.socket/1.0 etag=
DBUG[01-24|13:39:26] Got response struct from LXD 
DBUG[01-24|13:39:26] 
    {
        "config": {
            "cluster.https_address": "10.32.0.22:8443",
            "core.https_address": "10.32.0.22:8443",
            "core.trust_password": true
        },
        "api_extensions": [
            "storage_zfs_remove_snapshots",
            "container_host_shutdown_timeout",
            "container_stop_priority",
            "container_syscall_filtering",
            "auth_pki",
            "container_last_used_at",
            "etag",
            "patch",
            "usb_devices",
            "https_allowed_credentials",
            "image_compression_algorithm",
            "directory_manipulation",
            "container_cpu_time",
            "storage_zfs_use_refquota",
            "storage_lvm_mount_options",
            "network",
            "profile_usedby",
            "container_push",
            "container_exec_recording",
            "certificate_update",
            "container_exec_signal_handling",
            "gpu_devices",
            "container_image_properties",
            "migration_progress",
            "id_map",
            "network_firewall_filtering",
            "network_routes",
            "storage",
            "file_delete",
            "file_append",
            "network_dhcp_expiry",
            "storage_lvm_vg_rename",
            "storage_lvm_thinpool_rename",
            "network_vlan",
            "image_create_aliases",
            "container_stateless_copy",
            "container_only_migration",
            "storage_zfs_clone_copy",
            "unix_device_rename",
            "storage_lvm_use_thinpool",
            "storage_rsync_bwlimit",
            "network_vxlan_interface",
            "storage_btrfs_mount_options",
            "entity_description",
            "image_force_refresh",
            "storage_lvm_lv_resizing",
            "id_map_base",
            "file_symlinks",
            "container_push_target",
            "network_vlan_physical",
            "storage_images_delete",
            "container_edit_metadata",
            "container_snapshot_stateful_migration",
            "storage_driver_ceph",
            "storage_ceph_user_name",
            "resource_limits",
            "storage_volatile_initial_source",
            "storage_ceph_force_osd_reuse",
            "storage_block_filesystem_btrfs",
            "resources",
            "kernel_limits",
            "storage_api_volume_rename",
            "macaroon_authentication",
            "network_sriov",
            "console",
            "restrict_devlxd",
            "migration_pre_copy",
            "infiniband",
            "maas_network",
            "devlxd_events",
            "proxy",
            "network_dhcp_gateway",
            "file_get_symlink",
            "network_leases",
            "unix_device_hotplug",
            "storage_api_local_volume_handling",
            "operation_description",
            "clustering",
            "event_lifecycle",
            "storage_api_remote_volume_handling",
            "nvidia_runtime",
            "container_mount_propagation",
            "container_backup",
            "devlxd_images",
            "container_local_cross_pool_handling",
            "proxy_unix",
            "proxy_udp",
            "clustering_join",
            "proxy_tcp_udp_multi_port_handling",
            "network_state",
            "proxy_unix_dac_properties",
            "container_protection_delete",
            "unix_priv_drop",
            "pprof_http",
            "proxy_haproxy_protocol",
            "network_hwaddr",
            "proxy_nat",
            "network_nat_order",
            "container_full",
            "candid_authentication",
            "backup_compression",
            "candid_config",
            "nvidia_runtime_config",
            "storage_api_volume_snapshots",
            "storage_unmapped",
            "projects",
            "candid_config_key",
            "network_vxlan_ttl",
            "container_incremental_copy",
            "usb_optional_vendorid",
            "snapshot_scheduling",
            "container_copy_project",
            "clustering_server_address",
            "clustering_image_replication",
            "container_protection_shift",
            "snapshot_expiry",
            "container_backup_override_pool",
            "snapshot_expiry_creation",
            "network_leases_location",
            "resources_cpu_socket",
            "resources_gpu",
            "resources_numa",
            "kernel_features",
            "id_map_current",
            "event_location",
            "storage_api_remote_volume_snapshots",
            "network_nat_address",
            "container_nic_routes",
            "rbac",
            "cluster_internal_copy",
            "seccomp_notify",
            "lxc_features",
            "container_nic_ipvlan",
            "network_vlan_sriov",
            "storage_cephfs",
            "container_nic_ipfilter",
            "resources_v2",
            "container_exec_user_group_cwd",
            "container_syscall_intercept",
            "container_disk_shift",
            "storage_shifted",
            "resources_infiniband",
            "daemon_storage",
            "instances",
            "image_types",
            "resources_disk_sata",
            "clustering_roles",
            "images_expiry",
            "resources_network_firmware",
            "backup_compression_algorithm",
            "ceph_data_pool_name",
            "container_syscall_intercept_mount",
            "compression_squashfs",
            "container_raw_mount",
            "container_nic_routed",
            "container_syscall_intercept_mount_fuse",
            "container_disk_ceph",
            "virtual-machines",
            "image_profiles",
            "clustering_architecture",
            "resources_disk_id",
            "storage_lvm_stripes",
            "vm_boot_priority"
        ],
        "api_status": "stable",
        "api_version": "1.0",
        "auth": "trusted",
        "public": false,
        "auth_methods": [
            "tls"
        ],
        "environment": {
            "addresses": [
                "10.32.0.22:8443"
            ],
            "architectures": [
                "armv7l"
            ],
            "certificate": "XXX",
            "certificate_fingerprint": "XXX",
            "driver": "lxc",
            "driver_version": "3.0.3",
            "kernel": "Linux",
            "kernel_architecture": "armv7l",
            "kernel_features": {
                "netnsid_getifaddrs": "false",
                "seccomp_listener": "false",
                "seccomp_listener_continue": "false",
                "shiftfs": "false",
                "uevent_injection": "true",
                "unpriv_fscaps": "true"
            },
            "kernel_version": "4.19.97-v7+",
            "lxc_features": {
                "cgroup2": "false",
                "mount_injection_file": "false",
                "network_gateway_device_route": "false",
                "network_ipvlan": "false",
                "network_l2proxy": "false",
                "network_phys_macvlan_mtu": "false",
                "network_veth_router": "false",
                "seccomp_notify": "false"
            },
            "project": "default",
            "server": "lxd",
            "server_clustered": true,
            "server_name": "XXX",
            "server_pid": 1264,
            "server_version": "3.19",
            "storage": "ceph",
            "storage_version": "ceph version 12.2.11 (26dc3775efc7bb286a1d6d66faee0ba30ea23eee) luminous (stable)"
        }
    } 
DBUG[01-24|13:39:26] Sending request to LXD                   method=DELETE url=http://unix.socket/1.0/cluster/members/XXX etag=
Error: Delete https://10.33.0.16:8443/internal/cluster/accept: http: server gave HTTP response to HTTPS client
freeekanayaka commented 4 years ago

Do you have a node named XXX or is it a non-existing node?

Ne02ptzero commented 4 years ago

XXX is just me replacing the actual node name. But yes, the node do exist!

freeekanayaka commented 4 years ago

I can't reproduce this. Please could you provide more details about your situation? Debug log of the server which is handling the request would be useful. Or even better, a step-by-step reproducer.

Ne02ptzero commented 4 years ago

I can't provide a step-by-step reproducer, since this is happening everytime, I don't know that triggers the bug.

Client log

Error: Delete https://10.33.0.16:8443/internal/cluster/accept: http: server gave HTTP response to HTTPS client

Local Server log:

DBUG[01-24|15:53:56] Handling                                 user= method=GET url=/1.0 ip=@
DBUG[01-24|15:53:56] Handling                                 method=DELETE url=/1.0/cluster/members/XXX ip=@ user=
DBUG[01-24|15:53:56] Redirect member delete request to 10.33.0.16:8443 

I can't see any log related to the request on 10.33.0.16

I can, however, provide an overview of my cluster:

All the nodes are running 3.19 (master). I actually don't know if this bug was introduced in 3.19, since it's the first time I had to remove a cluster member.

freeekanayaka commented 4 years ago

Please can you try to submit the directly request to 10.33.0.16 and see if it shows more insightful logs? It should suffices to point your lxc client to it.

freeekanayaka commented 4 years ago

I was able to reproduce this. I will fix it in the next days.