Open bilbilmyc opened 1 month ago
k8s .123.7
root@g007:/var/lib/kubelet# docker -v Docker version 20.10.24, build 297e128
root@g007:/var/lib/kubelet# containerd -v containerd github.com/containerd/containerd v1.6.20 2806fc1057397dbaeefbea0e4e17bddfbd388f38
root@pt13:~# kubectl -n kube-system get ds nvidia-device-plugin-daemonset -o yaml | grep image {"apiVersion":"apps/v1","kind":"DaemonSet","metadata":{"annotations":{},"name":"nvidia-device-plugin-daemonset","namespace":"kube-system"},"spec":{"selector":{"matchLabels":{"name":"nvidia-device-plugin-ds"}},"template":{"metadata":{"labels":{"name":"nvidia-device-plugin-ds"}},"spec":{"affinity":{"nodeAffinity":{"requiredDuringSchedulingIgnoredDuringExecution":{"nodeSelectorTerms":[{"matchExpressions":[{"key":"no-gpu","operator":"NotIn","values":["enable"]}]}]}}},"containers":[{"env":[{"name":"FAIL_ON_INIT_ERROR","value":"false"}],"image":"nvcr.io/nvidia/k8s-device-plugin:v0.15.0","name":"nvidia-device-plugin-ctr","securityContext":{"allowPrivilegeEscalation":true,"capabilities":{"drop":["ALL"]}},"volumeMounts":[{"mountPath":"/var/lib/kubelet/device-plugins","name":"device-plugin"}]}],"priorityClassName":"system-node-critical","tolerations":[{"effect":"NoSchedule","key":"nvidia.com/gpu","operator":"Exists"}],"volumes":[{"hostPath":{"path":"/var/lib/kubelet/device-plugins"},"name":"device-plugin"}]}},"updateStrategy":{"type":"RollingUpdate"}}} image: nvcr.io/nvidia/k8s-device-plugin:v0.16.2 imagePullPolicy: IfNotPresent
root@pt13:~# kubectl -n shuzhifengqiao1 exec -it smiling-viva-6124-864b4f6cd7-r747r bash tom@smiling-viva-6124-864b4f6cd7-r747r:~$ nvidia-smi Failed to initialize NVML: Unknown Error
kubelet 's cpuManagerPolicy is static
root@g007:/var/lib/kubelet# cat /var/lib/kubelet/config.yaml | grep cpuManagerPolicy cpuManagerPolicy: static
Check this: https://github.com/NVIDIA/nvidia-container-toolkit/issues/48
environmental
k8s
k8s .123.7
docker
containerd
device-plugin
exec pod
kubelet
kubelet 's cpuManagerPolicy is static