canonical / lxd

Powerful system container and virtual machine manager
https://canonical.com/lxd
GNU Affero General Public License v3.0
4.38k stars 931 forks source link

Apparmor confinement for QEMU #6930

Closed simondeziel closed 4 years ago

simondeziel commented 4 years ago

Required information

config: {}
api_extensions:
- storage_zfs_remove_snapshots
- container_host_shutdown_timeout
- container_stop_priority
- container_syscall_filtering
- auth_pki
- container_last_used_at
- etag
- patch
- usb_devices
- https_allowed_credentials
- image_compression_algorithm
- directory_manipulation
- container_cpu_time
- storage_zfs_use_refquota
- storage_lvm_mount_options
- network
- profile_usedby
- container_push
- container_exec_recording
- certificate_update
- container_exec_signal_handling
- gpu_devices
- container_image_properties
- migration_progress
- id_map
- network_firewall_filtering
- network_routes
- storage
- file_delete
- file_append
- network_dhcp_expiry
- storage_lvm_vg_rename
- storage_lvm_thinpool_rename
- network_vlan
- image_create_aliases
- container_stateless_copy
- container_only_migration
- storage_zfs_clone_copy
- unix_device_rename
- storage_lvm_use_thinpool
- storage_rsync_bwlimit
- network_vxlan_interface
- storage_btrfs_mount_options
- entity_description
- image_force_refresh
- storage_lvm_lv_resizing
- id_map_base
- file_symlinks
- container_push_target
- network_vlan_physical
- storage_images_delete
- container_edit_metadata
- container_snapshot_stateful_migration
- storage_driver_ceph
- storage_ceph_user_name
- resource_limits
- storage_volatile_initial_source
- storage_ceph_force_osd_reuse
- storage_block_filesystem_btrfs
- resources
- kernel_limits
- storage_api_volume_rename
- macaroon_authentication
- network_sriov
- console
- restrict_devlxd
- migration_pre_copy
- infiniband
- maas_network
- devlxd_events
- proxy
- network_dhcp_gateway
- file_get_symlink
- network_leases
- unix_device_hotplug
- storage_api_local_volume_handling
- operation_description
- clustering
- event_lifecycle
- storage_api_remote_volume_handling
- nvidia_runtime
- container_mount_propagation
- container_backup
- devlxd_images
- container_local_cross_pool_handling
- proxy_unix
- proxy_udp
- clustering_join
- proxy_tcp_udp_multi_port_handling
- network_state
- proxy_unix_dac_properties
- container_protection_delete
- unix_priv_drop
- pprof_http
- proxy_haproxy_protocol
- network_hwaddr
- proxy_nat
- network_nat_order
- container_full
- candid_authentication
- backup_compression
- candid_config
- nvidia_runtime_config
- storage_api_volume_snapshots
- storage_unmapped
- projects
- candid_config_key
- network_vxlan_ttl
- container_incremental_copy
- usb_optional_vendorid
- snapshot_scheduling
- container_copy_project
- clustering_server_address
- clustering_image_replication
- container_protection_shift
- snapshot_expiry
- container_backup_override_pool
- snapshot_expiry_creation
- network_leases_location
- resources_cpu_socket
- resources_gpu
- resources_numa
- kernel_features
- id_map_current
- event_location
- storage_api_remote_volume_snapshots
- network_nat_address
- container_nic_routes
- rbac
- cluster_internal_copy
- seccomp_notify
- lxc_features
- container_nic_ipvlan
- network_vlan_sriov
- storage_cephfs
- container_nic_ipfilter
- resources_v2
- container_exec_user_group_cwd
- container_syscall_intercept
- container_disk_shift
- storage_shifted
- resources_infiniband
- daemon_storage
- instances
- image_types
- resources_disk_sata
- clustering_roles
- images_expiry
- resources_network_firmware
- backup_compression_algorithm
- ceph_data_pool_name
- container_syscall_intercept_mount
- compression_squashfs
- container_raw_mount
- container_nic_routed
- container_syscall_intercept_mount_fuse
- container_disk_ceph
- virtual-machines
- image_profiles
- clustering_architecture
- resources_disk_id
- storage_lvm_stripes
- vm_boot_priority
- unix_hotplug_devices
- api_filtering
- instance_nic_network
- clustering_sizing
api_status: stable
api_version: "1.0"
auth: trusted
public: false
auth_methods:
- tls
environment:
  addresses: []
  architectures:
  - x86_64
  - i686
  certificate: |
    -----BEGIN CERTIFICATE-----
    -----END CERTIFICATE-----
  certificate_fingerprint: fingerprint
  driver: lxc
  driver_version: 3.2.1
  kernel: Linux
  kernel_architecture: x86_64
  kernel_features:
    netnsid_getifaddrs: "true"
    seccomp_listener: "true"
    seccomp_listener_continue: "true"
    shiftfs: "false"
    uevent_injection: "true"
    unpriv_fscaps: "true"
  kernel_version: 5.3.0-40-generic
  lxc_features:
    cgroup2: "false"
    mount_injection_file: "true"
    network_gateway_device_route: "true"
    network_ipvlan: "true"
    network_l2proxy: "true"
    network_phys_macvlan_mtu: "true"
    network_veth_router: "true"
    seccomp_notify: "true"
  project: default
  server: lxd
  server_clustered: false
  server_name: simon-lemur
  server_pid: 12273
  server_version: "3.21"
  storage: zfs
  storage_version: 0.8.1-1ubuntu14.3

Issue description

It would be nice to have the QEMU process confined by Apparmor, like libvirt does.

I did a trivial experiment using lxd's latest snap and got this (toy) Apparmor profile permitting to lxc start/stop/console/shell into the VM:

# Author: Simon Deziel
#include <tunables/global>

@{SNAPLXD}=/snap/lxd/13487
@{LXDCOMMON}=/var/snap/lxd/common

profile lxd-qemu /snap/lxd/*/bin/qemu-system-x86_64 {
  #include <abstractions/base>

  capability setgid,
  capability setuid,
  capability sys_admin,
  capability sys_chroot,

  # required for reading disk images
  capability dac_override,
  capability dac_read_search,

  @{SNAPLXD}/lib/lib*.so*              mr,
  @{SNAPLXD}/lib/**/lib*.so*           mr,
  @{SNAPLXD}/share/qemu/OVMF_CODE.fd   kr,
  @{SNAPLXD}/share/qemu/kvmvapic.bin   r,
  @{SNAPLXD}/share/qemu/efi-virtio.rom r,

  # host devices
  /dev/net/tun rw,
  /dev/kvm  rw,
  /dev/ptmx rw,
  /dev/pts/[0-9]*   rw,
  /dev/vhost-net    rw,
  /dev/vhost-vsock  rw,

  # for gathering information about available host resources
  /sys/devices/system/cpu/ r,
  /sys/devices/system/node/ r,
  /sys/devices/system/node/node[0-9]*/meminfo r,
  /sys/module/vhost/parameters/max_mem_regions r,
  owner @{PROC}/@{pid}/net/psched r,

  # user switching
  /var/lib/snapd/hostfs/etc/group            r,
  /var/lib/snapd/hostfs/etc/nsswitch.conf    r,
  /var/lib/snapd/hostfs/etc/passwd           r,

  # XXX: this will need a helper
  # per-VM
  @{LXDCOMMON}/lxd/logs/*/qemu.conf                              r,
  @{LXDCOMMON}/lxd/logs/*/qemu.log                               w,
  @{LXDCOMMON}/lxd/logs/*/qemu.pid                               kw,
  @{LXDCOMMON}/lxd/logs/*/qemu.monitor                           w,
  @{LXDCOMMON}/lxd/storage-pools/*/virtual-machines/*/qemu.nvram krw,
  @{LXDCOMMON}/lxd/storage-pools/*/virtual-machines/*/*.iso      kr,
  @{LXDCOMMON}/lxd/storage-pools/*/virtual-machines/*/config/    r,
  @{LXDCOMMON}/lxd/storage-pools/*/virtual-machines/*/config/**  r,
  @{LXDCOMMON}/lxd/storage-pools/*/virtual-machines/*/config.iso w,
  owner /dev/zd[0-9]*                                            krw,
}

I believe that, as with libvirt, a helper tool will be needed to adapt the per-VM generated profile in order to provide better containment.

stgraber commented 4 years ago

LXD has built-in logic to generate and load apparmor profiles so no need for a helper tool.

Though the paths will be tricky as we can't hardcode snap paths or assumptions about symlink targets in there.

stgraber commented 4 years ago

Dropping this one in favor of #7181