Required information

Distribution: Ubuntu

Distribution version: 22.04.1 amd64


root@xeon:~# lxc info
config:
core.https_address: '[::]'
core.metrics_address: '[::]:9101'
core.proxy_https: http://squid.sdeziel.info:3128
images.auto_update_interval: "0"
api_extensions:
- storage_zfs_remove_snapshots
- container_host_shutdown_timeout
- container_stop_priority
- container_syscall_filtering
- auth_pki
- container_last_used_at
- etag
- patch
- usb_devices
- https_allowed_credentials
- image_compression_algorithm
- directory_manipulation
- container_cpu_time
- storage_zfs_use_refquota
- storage_lvm_mount_options
- network
- profile_usedby
- container_push
- container_exec_recording
- certificate_update
- container_exec_signal_handling
- gpu_devices
- container_image_properties
- migration_progress
- id_map
- network_firewall_filtering
- network_routes
- storage
- file_delete
- file_append
- network_dhcp_expiry
- storage_lvm_vg_rename
- storage_lvm_thinpool_rename
- network_vlan
- image_create_aliases
- container_stateless_copy
- container_only_migration
- storage_zfs_clone_copy
- unix_device_rename
- storage_lvm_use_thinpool
- storage_rsync_bwlimit
- network_vxlan_interface
- storage_btrfs_mount_options
- entity_description
- image_force_refresh
- storage_lvm_lv_resizing
- id_map_base
- file_symlinks
- container_push_target
- network_vlan_physical
- storage_images_delete
- container_edit_metadata
- container_snapshot_stateful_migration
- storage_driver_ceph
- storage_ceph_user_name
- resource_limits
- storage_volatile_initial_source
- storage_ceph_force_osd_reuse
- storage_block_filesystem_btrfs
- resources
- kernel_limits
- storage_api_volume_rename
- macaroon_authentication
- network_sriov
- console
- restrict_devlxd
- migration_pre_copy
- infiniband
- maas_network
- devlxd_events
- proxy
- network_dhcp_gateway
- file_get_symlink
- network_leases
- unix_device_hotplug
- storage_api_local_volume_handling
- operation_description
- clustering
- event_lifecycle
- storage_api_remote_volume_handling
- nvidia_runtime
- container_mount_propagation
- container_backup
- devlxd_images
- container_local_cross_pool_handling
- proxy_unix
- proxy_udp
- clustering_join
- proxy_tcp_udp_multi_port_handling
- network_state
- proxy_unix_dac_properties
- container_protection_delete
- unix_priv_drop
- pprof_http
- proxy_haproxy_protocol
- network_hwaddr
- proxy_nat
- network_nat_order
- container_full
- candid_authentication
- backup_compression
- candid_config
- nvidia_runtime_config
- storage_api_volume_snapshots
- storage_unmapped
- projects
- candid_config_key
- network_vxlan_ttl
- container_incremental_copy
- usb_optional_vendorid
- snapshot_scheduling
- snapshot_schedule_aliases
- container_copy_project
- clustering_server_address
- clustering_image_replication
- container_protection_shift
- snapshot_expiry
- container_backup_override_pool
- snapshot_expiry_creation
- network_leases_location
- resources_cpu_socket
- resources_gpu
- resources_numa
- kernel_features
- id_map_current
- event_location
- storage_api_remote_volume_snapshots
- network_nat_address
- container_nic_routes
- rbac
- cluster_internal_copy
- seccomp_notify
- lxc_features
- container_nic_ipvlan
- network_vlan_sriov
- storage_cephfs
- container_nic_ipfilter
- resources_v2
- container_exec_user_group_cwd
- container_syscall_intercept
- container_disk_shift
- storage_shifted
- resources_infiniband
- daemon_storage
- instances
- image_types
- resources_disk_sata
- clustering_roles
- images_expiry
- resources_network_firmware
- backup_compression_algorithm
- ceph_data_pool_name
- container_syscall_intercept_mount
- compression_squashfs
- container_raw_mount
- container_nic_routed
- container_syscall_intercept_mount_fuse
- container_disk_ceph
- virtual-machines
- image_profiles
- clustering_architecture
- resources_disk_id
- storage_lvm_stripes
- vm_boot_priority
- unix_hotplug_devices
- api_filtering
- instance_nic_network
- clustering_sizing
- firewall_driver
- projects_limits
- container_syscall_intercept_hugetlbfs
- limits_hugepages
- container_nic_routed_gateway
- projects_restrictions
- custom_volume_snapshot_expiry
- volume_snapshot_scheduling
- trust_ca_certificates
- snapshot_disk_usage
- clustering_edit_roles
- container_nic_routed_host_address
- container_nic_ipvlan_gateway
- resources_usb_pci
- resources_cpu_threads_numa
- resources_cpu_core_die
- api_os
- container_nic_routed_host_table
- container_nic_ipvlan_host_table
- container_nic_ipvlan_mode
- resources_system
- images_push_relay
- network_dns_search
- container_nic_routed_limits
- instance_nic_bridged_vlan
- network_state_bond_bridge
- usedby_consistency
- custom_block_volumes
- clustering_failure_domains
- resources_gpu_mdev
- console_vga_type
- projects_limits_disk
- network_type_macvlan
- network_type_sriov
- container_syscall_intercept_bpf_devices
- network_type_ovn
- projects_networks
- projects_networks_restricted_uplinks
- custom_volume_backup
- backup_override_name
- storage_rsync_compression
- network_type_physical
- network_ovn_external_subnets
- network_ovn_nat
- network_ovn_external_routes_remove
- tpm_device_type
- storage_zfs_clone_copy_rebase
- gpu_mdev
- resources_pci_iommu
- resources_network_usb
- resources_disk_address
- network_physical_ovn_ingress_mode
- network_ovn_dhcp
- network_physical_routes_anycast
- projects_limits_instances
- network_state_vlan
- instance_nic_bridged_port_isolation
- instance_bulk_state_change
- network_gvrp
- instance_pool_move
- gpu_sriov
- pci_device_type
- storage_volume_state
- network_acl
- migration_stateful
- disk_state_quota
- storage_ceph_features
- projects_compression
- projects_images_remote_cache_expiry
- certificate_project
- network_ovn_acl
- projects_images_auto_update
- projects_restricted_cluster_target
- images_default_architecture
- network_ovn_acl_defaults
- gpu_mig
- project_usage
- network_bridge_acl
- warnings
- projects_restricted_backups_and_snapshots
- clustering_join_token
- clustering_description
- server_trusted_proxy
- clustering_update_cert
- storage_api_project
- server_instance_driver_operational
- server_supported_storage_drivers
- event_lifecycle_requestor_address
- resources_gpu_usb
- clustering_evacuation
- network_ovn_nat_address
- network_bgp
- network_forward
- custom_volume_refresh
- network_counters_errors_dropped
- metrics
- image_source_project
- clustering_config
- network_peer
- linux_sysctl
- network_dns
- ovn_nic_acceleration
- certificate_self_renewal
- instance_project_move
- storage_volume_project_move
- cloud_init
- network_dns_nat
- database_leader
- instance_all_projects
- clustering_groups
- ceph_rbd_du
- instance_get_full
- qemu_metrics
- gpu_mig_uuid
- event_project
- clustering_evacuation_live
- instance_allow_inconsistent_copy
- network_state_ovn
- storage_volume_api_filtering
- image_restrictions
- storage_zfs_export
- network_dns_records
- storage_zfs_reserve_space
- network_acl_log
- storage_zfs_blocksize
- metrics_cpu_seconds
- instance_snapshot_never
- certificate_token
- instance_nic_routed_neighbor_probe
- event_hub
- agent_nic_config
- projects_restricted_intercept
- metrics_authentication
- images_target_project
- cluster_migration_inconsistent_copy
- cluster_ovn_chassis
- container_syscall_intercept_sched_setscheduler
- storage_lvm_thinpool_metadata_size
api_status: stable
api_version: "1.0"
auth: trusted
public: false
auth_methods:
- tls
environment:
addresses:
- 172.24.30.6:8443
- '[2001:470:b1c3:794a::6]:8443'
- 192.168.29.6:8443
architectures:
- x86_64
- i686
certificate: |
-----BEGIN CERTIFICATE-----
-----END CERTIFICATE-----
certificate_fingerprint: fingerprint
driver: lxc
driver_version: 4.0.12
firewall: nftables
kernel: Linux
kernel_architecture: x86_64
kernel_features:
idmapped_mounts: "true"
netnsid_getifaddrs: "true"
seccomp_listener: "true"
seccomp_listener_continue: "true"
shiftfs: "false"
uevent_injection: "true"
unpriv_fscaps: "true"
kernel_version: 5.15.0-43-generic
lxc_features:
cgroup2: "true"
core_scheduling: "true"
devpts_fd: "true"
idmapped_mounts_v2: "true"
mount_injection_file: "true"
network_gateway_device_route: "true"
network_ipvlan: "true"
network_l2proxy: "true"
network_phys_macvlan_mtu: "true"
network_veth_router: "true"
pidfd: "true"
seccomp_allow_deny_syntax: "true"
seccomp_notify: "true"
seccomp_proxy_send_notify_fd: "true"
os_name: Ubuntu
os_version: "22.04"
project: default
server: lxd
server_clustered: false
server_event_mode: full-mesh
server_name: xeon
server_pid: 227330
server_version: 5.0.0
storage: zfs
storage_version: 2.1.4-0ubuntu0.1
storage_supported_drivers:
- name: btrfs
version: 5.4.1
remote: false
- name: cephfs
version: 15.2.14
remote: true
- name: dir
version: "1"
remote: false
- name: lvm
version: 2.03.07(2) (2019-11-30) / 1.02.167 (2019-11-30) / 4.45.0
remote: false
- name: zfs
version: 2.1.4-0ubuntu0.1
remote: false
- name: ceph
version: 15.2.14
remote: true

root@xeon:~# snap list lxd Name Version Rev Tracking Publisher Notes lxd 5.0.0-b0287c1 22923 5.0/stable/… canonical✓ -


# Issue description

I recently reinstalled my host (named `xeon`) with Ubuntu 22.04.1. After some time, it started throwing this error on every prometheus scrape (every 15s):

Aug 3 07:00:07 xeon lxd.daemon[1313]: time="2022-08-03T07:00:07Z" level=warning msg="Failed to get disk stats" err="unexpected EOF" instance=metrics instanceType=container project=default Aug 3 07:00:22 xeon lxd.daemon[1313]: time="2022-08-03T07:00:22Z" level=warning msg="Failed to get disk stats" err="unexpected EOF" instance=metrics instanceType=container project=default Aug 3 07:00:37 xeon lxd.daemon[1313]: time="2022-08-03T07:00:37Z" level=warning msg="Failed to get disk stats" err="unexpected EOF" instance=metrics instanceType=container project=default ...


A `snap restart lxd` made it go away until it came back the day after. The warning is always about the container named `metrics`.

The container's config:

root@xeon:~# lxc config show --expanded metrics architecture: x86_64 config: image.architecture: amd64 image.description: Ubuntu focal amd64 (20220113_07:42) image.os: Ubuntu image.release: focal image.serial: "20220113_07:42" image.type: squashfs image.variant: default limits.cpu.allowance: 100% limits.memory: 512MiB limits.processes: "500" security.devlxd: "false" security.idmap.isolated: "true" security.nesting: "true" security.privileged: "false" security.protection.delete: "true" security.syscalls.deny_compat: "true" snapshots.expiry: 3d snapshots.schedule: '@daily, @startup' volatile.base_image: cd37dfe79d6edd4ab36943f5ca4226d47280285772bb457b622bbcec92fe350f volatile.cloud-init.instance-id: 9220ad48-38c7-42de-93e0-a9fd21046d1c volatile.eth0.host_name: vethba646770 volatile.eth0.hwaddr: 00:16:3e:bb:f5:f6 volatile.eth0.name: eth0 volatile.idmap.base: "1131072" volatile.idmap.current: '[{"Isuid":true,"Isgid":false,"Hostid":1131072,"Nsid":0,"Maprange":65536},{"Isuid":false,"Isgid":true,"Hostid":1131072,"Nsid":0,"Maprange":65536}]' volatile.idmap.next: '[{"Isuid":true,"Isgid":false,"Hostid":1131072,"Nsid":0,"Maprange":65536},{"Isuid":false,"Isgid":true,"Hostid":1131072,"Nsid":0,"Maprange":65536}]' volatile.last_state.idmap: '[{"Isuid":true,"Isgid":false,"Hostid":1131072,"Nsid":0,"Maprange":65536},{"Isuid":false,"Isgid":true,"Hostid":1131072,"Nsid":0,"Maprange":65536}]' volatile.last_state.power: RUNNING volatile.uuid: 7760acd1-e480-483e-949d-95b4d43cdd2d devices: eth0: network: int type: nic prometheus: path: /var/snap/prometheus/common/ pool: default source: prometheus type: disk root: path: / pool: default size: 4GiB type: disk ephemeral: false profiles:

sdeziel stateful: false description: ""

That container is one of many on that server and other containers also have volumes attached to them:

root@xeon:~# lxc ls
+---------+---------+---------------------+-------------------------------+-----------+-----------+
|  NAME   |  STATE  |        IPV4         |             IPV6              |   TYPE    | SNAPSHOTS |
+---------+---------+---------------------+-------------------------------+-----------+-----------+
| log     | RUNNING | 172.24.21.51 (eth0) | 2001:470:b1c3:7941::51 (eth0) | CONTAINER | 4         |
+---------+---------+---------------------+-------------------------------+-----------+-----------+
| metrics | RUNNING | 172.24.21.66 (eth0) | 2001:470:b1c3:7941::66 (eth0) | CONTAINER | 0         |
+---------+---------+---------------------+-------------------------------+-----------+-----------+
| puppet  | RUNNING | 172.24.21.40 (eth0) | 2001:470:b1c3:7941::40 (eth0) | CONTAINER | 4         |
+---------+---------+---------------------+-------------------------------+-----------+-----------+
| redmine | STOPPED |                     |                               | CONTAINER | 0         |
+---------+---------+---------------------+-------------------------------+-----------+-----------+
| smb     | RUNNING | 172.24.28.45 (eth0) | 2001:470:b1c3:7948::45 (eth0) | CONTAINER | 4         |
+---------+---------+---------------------+-------------------------------+-----------+-----------+
| squid   | RUNNING | 172.24.21.28 (eth0) | 2001:470:b1c3:7941::28 (eth0) | CONTAINER | 4         |
+---------+---------+---------------------+-------------------------------+-----------+-----------+

The only unusual thing about metrics is that it runs snapd and has some snaps installed inside it.

root@metrics:~# snap list
Name        Version   Rev    Tracking       Publisher   Notes
core20      20220719  1587   latest/stable  canonical✓  base
prometheus  2.32.1    73     20.04/edge     canonical✓  -
snapd       2.56.2    16292  latest/stable  canonical✓  snapd
root@metrics:~# lsblk
NAME   MAJ:MIN RM   SIZE RO TYPE MOUNTPOINT
loop0    7:0    0    62M  1 loop 
loop1    7:1    0    62M  1 loop 
loop2    7:2    0    80M  1 loop 
loop3    7:3    0    47M  1 loop 
sda      8:0    0 232.9G  0 disk 
├─sda1   8:1    0     1M  0 part 
├─sda2   8:2    0    24G  0 part 
├─sda3   8:3    0     2G  0 part 
└─sda4   8:4    0   128G  0 part 
sdb      8:16   0 232.9G  0 disk 
├─sdb1   8:17   0     1M  0 part 
├─sdb2   8:18   0    24G  0 part 
├─sdb3   8:19   0     2G  0 part 
└─sdb4   8:20   0   128G  0 part 
sdc      8:32   0   2.7T  0 disk 
├─sdc1   8:33   0   2.7T  0 part 
└─sdc9   8:41   0     8M  0 part

In the above, the sda, sdb and sdc devices are leaked from the host :/

Comparing the cgroup files for metrics with those of another container (squid), we see that io.stat is populated only for metrics:

root@xeon:~# grep . /sys/fs/cgroup/lxc.payload.metrics/io.*
/sys/fs/cgroup/lxc.payload.metrics/io.pressure:some avg10=0.00 avg60=0.00 avg300=0.00 total=10851611
/sys/fs/cgroup/lxc.payload.metrics/io.pressure:full avg10=0.00 avg60=0.00 avg300=0.00 total=3709916
/sys/fs/cgroup/lxc.payload.metrics/io.prio.class:no-change
/sys/fs/cgroup/lxc.payload.metrics/io.stat:8:16 
/sys/fs/cgroup/lxc.payload.metrics/io.weight:default 100
root@xeon:~# grep . /sys/fs/cgroup/lxc.payload.squid/io.*
/sys/fs/cgroup/lxc.payload.squid/io.pressure:some avg10=0.00 avg60=0.00 avg300=0.00 total=690648
/sys/fs/cgroup/lxc.payload.squid/io.pressure:full avg10=0.00 avg60=0.00 avg300=0.00 total=616654
/sys/fs/cgroup/lxc.payload.squid/io.prio.class:no-change
/sys/fs/cgroup/lxc.payload.squid/io.weight:default 100

Only metrics has content in its io.stat file:

root@xeon:~# grep . /sys/fs/cgroup/lxc.payload.*/io.stat
/sys/fs/cgroup/lxc.payload.metrics/io.stat:8:16

canonical / lxd

`msg="Failed to get disk stats" err="unexpected EOF"` when collecting metrics for a certain container #10746

Required information