todaygood / container-lab

for docker
0 stars 0 forks source link

docker start report "no space left on device" #9

Open todaygood opened 5 years ago

todaygood commented 5 years ago
[root@cloud-12-01 ~]# docker start neutron_dhcp_agent
Error response from daemon: invalid header field value "oci runtime error: container_linux.go:247: starting container process caused \"process_linux.go:359: container init caused \\\"rootfs_linux.go:53: mounting \\\\\\\"/run/netns\\\\\\\" to rootfs \\\\\\\"/var/lib/docker/devicemapper/mnt/1262cec4d6a1dc58158ba7ccb9534e30c62d9e95f96cef3912210425fda92b55/rootfs\\\\\\\" at \\\\\\\"/var/lib/docker/devicemapper/mnt/1262cec4d6a1dc58158ba7ccb9534e30c62d9e95f96cef3912210425fda92b55/rootfs/run/netns\\\\\\\" caused \\\\\\\"no space left on device\\\\\\\"\\\"\"\n"
Error: failed to start containers: neutron_dhcp_agent
todaygood commented 5 years ago

从这个错误提示,mount 失败

mount 的输出,可以看到:

tmpfs on /run/netns type tmpfs (rw,nosuid,nodev,mode=755)
proc on /run/netns/qrouter-297e9a97-bcac-446e-a3d3-bb1636948cb4 type proc (rw,nosuid,nodev,noexec,relatime)
proc on /run/netns/qrouter-74edb777-78a6-44f0-b6a1-be0d45778377 type proc (rw,nosuid,nodev,noexec,relatime)
proc on /run/netns/qdhcp-51e73c1f-2fdb-4af2-893a-744e12207264 type proc (rw,nosuid,nodev,noexec,relatime)
proc on /run/netns/qdhcp-927d0b06-fd58-4798-91aa-1075cff1bd3f type proc (rw,nosuid,nodev,noexec,relatime)
proc on /run/netns/qdhcp-a37c56ff-9761-4c4e-8731-a9f57e502257 type proc (rw,nosuid,nodev,noexec,relatime)
proc on /run/netns/qdhcp-e61a2655-075e-4a3c-b80e-43cb8982b839 type proc (rw,nosuid,nodev,noexec,relatime)

查询namespace

[root@cloud-sz-control-b12-01 devicemapper]# ip netns 
qdhcp-e61a2655-075e-4a3c-b80e-43cb8982b839 (id: 8)
qdhcp-a37c56ff-9761-4c4e-8731-a9f57e502257 (id: 6)
qdhcp-927d0b06-fd58-4798-91aa-1075cff1bd3f (id: 4)
qdhcp-51e73c1f-2fdb-4af2-893a-744e12207264 (id: 5)
qrouter-74edb777-78a6-44f0-b6a1-be0d45778377 (id: 0)
qrouter-297e9a97-bcac-446e-a3d3-bb1636948cb4 (id: 1)

Solution

删除所有的qdhcp net namespace

ip netns delete qdhcp-xxxx

Refer

https://unix.stackexchange.com/questions/113530/how-to-find-out-namespace-of-a-particular-process

todaygood commented 5 years ago

Similar Issue

有时候启动lbaas agent 失败

[root@cloud-sz-control-b12-03 log]# docker start neutron_lbaas_agent
Error response from daemon: invalid header field value "oci runtime error: container_linux.go:247: starting container process caused \"process_linux.go:359: container init caused \\\"rootfs_linux.go:53: mounting \\\\\\\"/run\\\\\\\" to rootfs \\\\\\\"/var/lib/docker/devicemapper/mnt/ae2e7bee2db25e16f1498df5e9e2f1ff4105a7ec6ab8d1934b2a1fc9e3187495/rootfs\\\\\\\" at \\\\\\\"/var/lib/docker/devicemapper/mnt/ae2e7bee2db25e16f1498df5e9e2f1ff4105a7ec6ab8d1934b2a1fc9e3187495/rootfs/run\\\\\\\" caused \\\\\\\"no space left on device\\\\\\\"\\\"\"\n"
Error: failed to start containers: neutron_lbaas_agent

有时候又出现dhcp和l3启动不了

[root@cloud-12-03 run]#  docker ps -a |grep -i exit 
f6e5873b7a50        10.54.12.2:5000/bgi/centos-binary-neutron-l3-agent:4.0.0             "kolla_start"       11 weeks ago        Exited (128) 10 minutes ago                       neutron_l3_agent
b50165beeac8        10.54.12.2:5000/bgi/centos-binary-neutron-dhcp-agent:4.0.0           "kolla_start"       11 months ago       Exited (137) 10 minutes ago                       neutron_dhcp_agent
[root@cloud-12-03 run]# docker start neutron_l3_agent
Error response from daemon: invalid header field value "oci runtime error: container_linux.go:247: starting container process caused \"process_linux.go:359: container init caused \\\"rootfs_linux.go:53: mounting \\\\\\\"/run/netns\\\\\\\" to rootfs \\\\\\\"/var/lib/docker/devicemapper/mnt/b0bc0ebc0f24601ee3089ed0d70b758d85d21ba1f60e218ad6c4358aedc8a631/rootfs\\\\\\\" at \\\\\\\"/var/lib/docker/devicemapper/mnt/b0bc0ebc0f24601ee3089ed0d70b758d85d21ba1f60e218ad6c4358aedc8a631/rootfs/run/netns\\\\\\\" caused \\\\\\\"no space left on device\\\\\\\"\\\"\"\n"
Error: failed to start containers: neutron_l3_agent
[root@cloud-12-03 run]# docker start neutron_dhcp_agent
Error response from daemon: invalid header field value "oci runtime error: container_linux.go:247: starting container process caused \"process_linux.go:359: container init caused \\\"rootfs_linux.go:53: mounting \\\\\\\"/run/netns\\\\\\\" to rootfs \\\\\\\"/var/lib/docker/devicemapper/mnt/21b2c422c1344aea3602b0b05bcd26efdc98cb4c5495759cb8da87ce81cbe413/rootfs\\\\\\\" at \\\\\\\"/var/lib/docker/devicemapper/mnt/21b2c422c1344aea3602b0b05bcd26efdc98cb4c5495759cb8da87ce81cbe413/rootfs/run/netns\\\\\\\" caused \\\\\\\"no space left on device\\\\\\\"\\\"\"\n"
Error: failed to start containers: neutron_dhcp_agent

Investigation

查看正常机器上的该容器 docker inspect neutron_lbaas_agent

  "Binds": [
                "/etc/localtime:/etc/localtime:ro",
                "kolla_logs:/var/log/kolla/:rw",
                "/run/netns/:/run/netns/:shared",
                "/run:/run:shared",
                "neutron_metadata_socket:/var/lib/neutron/kolla/:rw",
                "/etc/kolla//neutron-lbaas-agent/:/var/lib/kolla/config_files/:ro"
            ],

报错,也就是mount A B 失败导致。

[root@cloud-12-03 run]# lsns |grep net 
4026531968 net     1431     1 root  /usr/lib/systemd/systemd --switched-root --system --deserialize 21
[root@cloud-12-03 run]# cd /run/netns/

随便找了一个目录,mount试试看

[root@cloud-12-03 run]# mount /run/netns/   ppp/
mount:  /run/netns is not a block device
[root@cloud-12-03 run]# mount -o bind /run/netns/   ppp/
[root@cloud-12-03 run]# umount ppp/
[root@cloud-12-03 run]# umount /run/netns 
[root@cloud-12-03 run]# ls /run/netns/
[root@cloud-12-03 run]# docker ps -a |grep neutron 
f6e5873b7a50        10.54.12.2:5000/bgi/centos-binary-neutron-l3-agent:4.0.0             "kolla_start"       11 weeks ago        Exited (128) 22 minutes ago                       neutron_l3_agent
ed3621216e90        10.54.12.2:5000/bgi/centos-binary-neutron-metadata-agent:4.0.0       "kolla_start"       11 months ago       Up 17 hours                                       neutron_metadata_agent
b50165beeac8        10.54.12.2:5000/bgi/centos-binary-neutron-dhcp-agent:4.0.0           "kolla_start"       11 months ago       Exited (137) 22 minutes ago                       neutron_dhcp_agent
0d1abcc96e4a        10.54.12.2:5000/bgi/centos-binary-neutron-openvswitch-agent:4.0.0    "kolla_start"       11 months ago       Up 17 hours                                       neutron_openvswitch_agent
11a5cc3cdb44        10.54.12.2:5000/bgi/centos-binary-neutron-server:4.0.0               "kolla_start"       11 months ago       Up 17 hours                                       neutron_server
[root@cloud-12-03 run]# docker start neutron_dhcp_agent
neutron_dhcp_agent
[root@cloud-12-03 run]# docker start neutron_l3_agent
neutron_l3_agent

再 umount /run/netns 之后,发现容器启动成功,并且非常快速, 说明/run/netns 可能之前已经被mount上了然后有什么锁导致,当我umount之后这个锁自然被解开,所以mount很快成功, 进而容器启动成功。

todaygood commented 5 years ago

看rootfs_linux.go 源代码,就比较容易理解错误提示。

// setupRootfs sets up the devices, mount points, and filesystems for use inside a
// new mount namespace.
func setupRootfs(config *configs.Config, console *linuxConsole, pipe io.ReadWriter) (err error) {
    if err := prepareRoot(config); err != nil {
        return newSystemErrorWithCause(err, "preparing rootfs")
    }

    setupDev := needsSetupDev(config)
    for _, m := range config.Mounts {
        for _, precmd := range m.PremountCmds {
            if err := mountCmd(precmd); err != nil {
                return newSystemErrorWithCause(err, "running premount command")
            }
        }
        if err := mountToRootfs(m, config.Rootfs, config.MountLabel); err != nil {
            return newSystemErrorWithCausef(err, "mounting %q to rootfs %q at %q", m.Source, config.Rootfs, m.Destination)
        }

        for _, postcmd := range m.PostmountCmds {
            if err := mountCmd(postcmd); err != nil {
                return newSystemErrorWithCause(err, "running postmount command")
            }
        }
    }