AliyunContainerService / gpushare-scheduler-extender

GPU Sharing Scheduler for Kubernetes Cluster
Apache License 2.0
1.36k stars 303 forks source link

如何使用ALIYUN_COM_GPU_SPECIAL_IDX指定主机运行,我使用主机名不生效 #179

Closed 1003111014 closed 2 years ago

1003111014 commented 2 years ago

因调度不合理,大量算法都调度到了一台机器,想通过如何使用ALIYUN_COM_GPU_SPECIAL_IDX进行指定主机运行,但是在算法的环境变量中指定了并没有生效 [ ] 2022/06/28 07:22:59 begin to sync gpushare pod p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-68bb8bc7fb-flqxj in ns ai-model [ ] 2022/06/28 07:22:59 Add or update pod info: &Pod{ObjectMeta:k8s_io_apimachinery_pkg_apis_meta_v1.ObjectMeta{Name:p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-68bb8bc7fb-flqxj,GenerateName:p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-68bb8bc7fb-,Namespace:ai-model,SelfLink:,UID:4f0400bc-98b5-4cf6-9385-39ca1bf8eb3e,ResourceVersion:25778183,Generation:0,CreationTimestamp:2022-06-28 07:22:59 +0000 UTC,DeletionTimestamp:<nil>,DeletionGracePeriodSeconds:nil,Labels:map[string]string{app: 00c54b7a-d015-11ec-8db7-12cc16fb82ca,pod-template-hash: 68bb8bc7fb,},Annotations:map[string]string{cattle.io/timestamp: 2022-06-28T07:22:02Z,field.cattle.io/ports: [[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]],},OwnerReferences:[{apps/v1 ReplicaSet p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-68bb8bc7fb cc257989-c70d-4489-9021-ea5b8ad5dc4d 0xc420752f0a 0xc420752f0b}],Finalizers:[],ClusterName:,Initializers:nil,},Spec:PodSpec{Volumes:[{default-token-zt2rw {nil nil nil nil nil SecretVolumeSource{SecretName:default-token-zt2rw,Items:[],DefaultMode:*420,Optional:nil,} nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil}}],Containers:[{c-00c54b7a-d015-11ec-8db7-12cc16fb82ca registry.kilox.cn/ai/offline_function_tensorrt/air_switch:amd-2.0 [] [] [{port-7070 0 7070 TCP }] [] [{ALIYUN_COM_GPU_SPECIAL_IDX worker1 nil} {DATA_CALLBACK_URL http://ai-service.bps:8000/ai/model/process/data nil} {MINIO_ACCESS_KEY AKIAIOSFODNN7EXAMGTF nil} {MINIO_BUCKET_NAME aibucket nil} {MINIO_ENDPOINT minio-k9jzg.bps:9000 nil} {MINIO_SECRET_KEY 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 nil} {MINIO_SECURE False nil} {NVIDIA_VISIBLE_DEVICES all nil}] {map[aliyun.com/gpu-mem:{{768 0} {<nil>} 768 DecimalSI}] map[aliyun.com/gpu-mem:{{768 0} {<nil>} 768 DecimalSI}]} [{default-token-zt2rw true /var/run/secrets/kubernetes.io/serviceaccount <nil>}] [] nil nil nil /dev/term ination-log File IfNotPresent SecurityContext{Capabilities:&Capabilities{Add:[],Drop:[],},Privileged:nil,SELinuxOptions:nil,RunAsUser:nil,RunAsNonRoot:nil,ReadOnlyRootFilesystem:nil,AllowPrivilegeEscalation:nil,RunAsGroup:nil,} false false false}],RestartPolicy:Always,TerminationGracePeriodSeconds:*30,ActiveDeadlineSeconds:nil,DNSPolicy:ClusterFirst,NodeSelector:map[string]string{},ServiceAccountName:default,DeprecatedServiceAccount:default,NodeName:,HostNetwork:false,HostPID:false,HostIPC:false,SecurityContext:&PodSecurityContext{SELinuxOptions:nil,RunAsUser:nil,RunAsNonRoot:nil,SupplementalGroups:[],FSGroup:nil,RunAsGroup:nil,Sysctls:[],},ImagePullSecrets:[],Hostname:,Subdomain:,Affinity:nil,SchedulerName:default-scheduler,InitContainers:[],AutomountServiceAccountToken:nil,Tolerations:[{node.kubernetes.io/not-ready Exists NoExecute 0xc4207537f0} {node.kubernetes.io/unreachable Exists NoExecute 0xc420753810}],HostAliases:[],PriorityClassName:,Priority:*0,DNSConfig:nil,ShareProcessNamespace:nil,ReadinessGates:[],},Status:PodStatus{Phase:Pending,Conditions:[],Message:,Reason:,HostIP:,PodIP:,StartTime:<nil>,ContainerStatuses:[],QOSClass:BestEffort,InitContainerStatuses:[],NominatedNodeName:,},} [ ] 2022/06/28 07:22:59 Node map[worker1:0xc420323600 worker2:0xc420632580] [ ] 2022/06/28 07:22:59 pod p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-68bb8bc7fb-flqxj in ns ai-model is not assigned to any node, skip [ ] 2022/06/28 07:22:59 end processNextWorkItem() [ ] 2022/06/28 07:22:59 No need to update pod name p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-68bb8bc7fb-flqxj in ns ai-model and old status is Pending, new status is Pending; its old annotation map[cattle.io/timestamp:2022-06-28T07:22:02Z field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] and new annotation map[cattle.io/timestamp:2022-06-28T07:22:02Z field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] [ ] 2022/06/28 07:22:59 No need to update pod name p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-68bb8bc7fb-flqxj in ns ai-model and old status is Pending, new status is Pending; its old annotation map[cattle.io/timestamp:2022-06-28T07:22:02Z field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] and new annotation map[cattle.io/timestamp:2022-06-28T07:22:02Z field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] [ ] 2022/06/28 07:23:00 begin processNextWorkItem() [ ] 2022/06/28 07:23:01 No need to update pod name p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-68bb8bc7fb-flqxj in ns ai-model and old status is Pending, new status is Pending; its old annotation map[cattle.io/timestamp:2022-06-28T07:22:02Z field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] and new annotation map[cni.projectcalico.org/podIP:10.42.4.203/32 cni.projectcalico.org/podIPs:10.42.4.203/32 field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]] cattle.io/timestamp:2022-06-28T07:22:02Z] [ ] 2022/06/28 07:23:02 No need to update pod name p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-68bb8bc7fb-flqxj in ns ai-model and old status is Pending, new status is Running; its old annotation map[cattle.io/timestamp:2022-06-28T07:22:02Z cni.projectcalico.org/podIP:10.42.4.203/32 cni.projectcalico.org/podIPs:10.42.4.203/32 field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] and new annotation map[cattle.io/timestamp:2022-06-28T07:22:02Z cni.projectcalico.org/podIP:10.42.4.203/32 cni.projectcalico.org/podIPs:10.42.4.203/32 field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] [ ] 2022/06/28 07:23:03 No need to update pod name p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-6846dd57f9-pxhfw in ns ai-model and old status is Running, new status is Running; its old annotation map[cattle.io/timestamp:2022-06-28T06:57:35Z cni.projectcalico.org/podIP:10.42.4.202/32 cni.projectcalico.org/podIPs:10.42.4.202/32 field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] and new annotation map[cattle.io/timestamp:2022-06-28T06:57:35Z cni.projectcalico.org/podIP: cni.projectcalico.org/podIPs: field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] [ ] 2022/06/28 07:23:03 No need to update pod name p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-6846dd57f9-pxhfw in ns ai-model and old status is Running, new status is Running; its old annotation map[field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]] cattle.io/timestamp:2022-06-28T06:57:35Z cni.projectcalico.org/podIP: cni.projectcalico.org/podIPs:] and new annotation map[cattle.io/timestamp:2022-06-28T06:57:35Z cni.projectcalico.org/podIP: cni.projectcalico.org/podIPs: field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] [ ] 2022/06/28 07:23:04 No need to update pod name p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-68bb8bc7fb-flqxj in ns ai-model and old status is Running, new status is Running; its old annotation map[field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]] cattle.io/timestamp:2022-06-28T07:22:02Z cni.projectcalico.org/podIP:10.42.4.203/32 cni.projectcalico.org/podIPs:10.42.4.203/32] and new annotation map[field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]] cattle.io/timestamp:2022-06-28T07:22:02Z cni.projectcalico.org/podIP:10.42.4.203/32 cni.projectcalico.org/podIPs:10.42.4.203/32] [ ] 2022/06/28 07:23:04 No need to update pod name p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-6846dd57f9-pxhfw in ns ai-model and old status is Running, new status is Running; its old annotation map[cattle.io/timestamp:2022-06-28T06:57:35Z cni.projectcalico.org/podIP: cni.projectcalico.org/podIPs: field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] and new annotation map[cattle.io/timestamp:2022-06-28T06:57:35Z cni.projectcalico.org/podIP: cni.projectcalico.org/podIPs: field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] [ ] 2022/06/28 07:23:04 No need to update pod name p-d049b5a6-d025-11ec-9bf1-12cc16fb82ca-bd84d675b-bkn5j in ns ai-model and old status is Running, new status is Running; its old annotation map[cni.projectcalico.org/podIP:10.42.4.180/32 cni.projectcalico.org/podIPs:10.42.4.180/32] and new annotation map[cni.projectcalico.org/podIP:10.42.4.180/32 cni.projectcalico.org/podIPs:10.42.4.180/32] [ ] 2022/06/28 07:23:04 No need to update pod name p-fa4f5964-d025-11ec-b57d-12cc16fb82ca-56cbfbb66b-ckmmh in ns ai-model and old status is Running, new status is Running; its old annotation map[cni.projectcalico.org/podIP:10.42.4.179/32 cni.projectcalico.org/podIPs:10.42.4.179/32] and new annotation map[cni.projectcalico.org/podIP:10.42.4.179/32 cni.projectcalico.org/podIPs:10.42.4.179/32] [ ] 2022/06/28 07:23:04 No need to update pod name p-b521d0ac-d4c0-11ec-8214-4a2f8bc05280-77b7c885c8-xl5m5 in ns ai-model and old status is Running, new status is Running; its old annotation map[cni.projectcalico.org/podIP:10.42.4.198/32 cni.projectcalico.org/podIPs:10.42.4.198/32] and new annotation map[cni.projectcalico.org/podIP:10.42.4.198/32 cni.projectcalico.org/podIPs:10.42.4.198/32] [ ] 2022/06/28 07:23:04 No need to update pod name p-86647828-d4c0-11ec-b694-4a2f8bc05280-fb78c79bd-2w78j in ns ai-model and old status is Running, new status is Running; its old annotation map[cni.projectcalico.org/podIPs:10.42.4.196/32 cni.projectcalico.org/podIP:10.42.4.196/32] and new annotation map[cni.projectcalico.org/podIP:10.42.4.196/32 cni.projectcalico.org/podIPs:10.42.4.196/32] [ ] 2022/06/28 07:23:04 No need to update pod name p-b9e39a20-d4f3-11ec-a7f0-121abc847da5-5cc9967657-rxlp2 in ns ai-model and old status is Running, new status is Running; its old annotation map[cni.projectcalico.org/podIP:10.42.4.190/32 cni.projectcalico.org/podIPs:10.42.4.190/32] and new annotation map[cni.projectcalico.org/podIPs:10.42.4.190/32 cni.projectcalico.org/podIP:10.42.4.190/32] [ ] 2022/06/28 07:23:04 No need to update pod name p-775af1d0-d025-11ec-b79a-12cc16fb82ca-694bddd8c8-22q9p in ns ai-model and old status is Running, new status is Running; its old annotation map[cattle.io/timestamp:2022-06-15T08:43:32Z cni.projectcalico.org/podIP:10.42.4.197/32 cni.projectcalico.org/podIPs:10.42.4.197/32 field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-775af1d0-d025-11ec-b79a-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] and new annotation map[cattle.io/timestamp:2022-06-15T08:43:32Z cni.projectcalico.org/podIP:10.42.4.197/32 cni.projectcalico.org/podIPs:10.42.4.197/32 field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-775af1d0-d025-11ec-b79a-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] [ ] 2022/06/28 07:23:04 No need to update pod name p-07e7c748-d024-11ec-b79a-12cc16fb82ca-5d5cfcf594-m59rv in ns ai-model and old status is Running, new status is Running; its old annotation map[cni.projectcalico.org/podIP:10.42.3.117/32 cni.projectcalico.org/podIPs:10.42.3.117/32] and new annotation map[cni.projectcalico.org/podIP:10.42.3.117/32 cni.projectcalico.org/podIPs:10.42.3.117/32] [ ] 2022/06/28 07:23:04 No need to update pod name p-56c0cd7c-d026-11ec-9141-12cc16fb82ca-59959dd959-45z8g in ns ai-model and old status is Running, new status is Running; its old annotation map[cni.projectcalico.org/podIP:10.42.3.119/32 cni.projectcalico.org/podIPs:10.42.3.119/32] and new annotation map[cni.projectcalico.org/podIP:10.42.3.119/32 cni.projectcalico.org/podIPs:10.42.3.119/32] [ ] 2022/06/28 07:23:04 No need to update pod name p-ad4a9c10-d5c4-11ec-a679-664243c08fda-57df77d867-b62lx in ns ai-model and old status is Running, new status is Running; its old annotation map[cni.projectcalico.org/podIP:10.42.3.115/32 cni.projectcalico.org/podIPs:10.42.3.115/32] and new annotation map[cni.projectcalico.org/podIP:10.42.3.115/32 cni.projectcalico.org/podIPs:10.42.3.115/32] [ ] 2022/06/28 07:23:04 No need to update pod name p-026d8cd8-e0b6-11ec-b0ec-4a6aa9346189-84465dcdd-jpl2g in ns ai-model and old status is Running, new status is Running; its old annotation map[cni.projectcalico.org/podIP:10.42.4.181/32 cni.projectcalico.org/podIPs:10.42.4.181/32] and new annotation map[cni.projectcalico.org/podIP:10.42.4.181/32 cni.projectcalico.org/podIPs:10.42.4.181/32] [ ] 2022/06/28 07:23:04 No need to update pod name p-7dd3830c-f6ad-11ec-ae06-5612b7ec9a25-7d6f44775f-pmdnc in ns ai-model and old status is Running, new status is Running; its old annotation map[cni.projectcalico.org/podIP:10.42.3.120/32 cni.projectcalico.org/podIPs:10.42.3.120/32] and new annotation map[cni.projectcalico.org/podIP:10.42.3.120/32 cni.projectcalico.org/podIPs:10.42.3.120/32] [ ] 2022/06/28 07:23:04 No need to update pod name p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-6846dd57f9-cz8pl in ns ai-model and old status is Running, new status is Running; its old annotation map[field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]] cattle.io/timestamp:2022-06-28T06:57:35Z cni.projectcalico.org/podIP:10.42.4.201/32 cni.projectcalico.org/podIPs:10.42.4.201/32] and new annotation map[cni.projectcalico.org/podIP:10.42.4.201/32 cni.projectcalico.org/podIPs:10.42.4.201/32 field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]] cattle.io/timestamp:2022-06-28T06:57:35Z] [ ] 2022/06/28 07:23:04 No need to update pod name p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-6846dd57f9-pxhfw in ns ai-model and old status is Running, new status is Running; its old annotation map[cni.projectcalico.org/podIP: cni.projectcalico.org/podIPs: field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]] cattle.io/timestamp:2022-06-28T06:57:35Z] and new annotation map[cattle.io/timestamp:2022-06-28T06:57:35Z cni.projectcalico.org/podIP: cni.projectcalico.org/podIPs: field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] [ ] 2022/06/28 07:23:04 delete pod p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-6846dd57f9-pxhfw in ns ai-model [ ] 2022/06/28 07:23:04 begin to sync gpushare pod p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-6846dd57f9-pxhfw in ns ai-model [ ] 2022/06/28 07:23:04 pod p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-6846dd57f9-pxhfw in ns ai-model has been deleted. [ ] 2022/06/28 07:23:04 Remove pod info: &Pod{ObjectMeta:k8s_io_apimachinery_pkg_apis_meta_v1.ObjectMeta{Name:p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-6846dd57f9-pxhfw,GenerateName:p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-6846dd57f9-,Namespace:ai-model,SelfLink:,UID:c5d07b91-4038-48e7-8727-f06d36154bd7,ResourceVersion:25778231,Generation:0,CreationTimestamp:2022-06-28 07:21:57 +0000 UTC,DeletionTimestamp:2022-06-28 07:22:32 +0000 UTC,DeletionGracePeriodSeconds:*0,Labels:map[string]string{app: 00c54b7a-d015-11ec-8db7-12cc16fb82ca,pod-template-hash: 6846dd57f9,},Annotations:map[string]string{cattle.io/timestamp: 2022-06-28T06:57:35Z,cni.projectcalico.org/podIP: ,cni.projectcalico.org/podIPs: ,field.cattle.io/ports: [[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]],},OwnerReferences:[{apps/v1 ReplicaSet p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-6846dd57f9 1817646b-6d2f-40cd-bd28-c5ef3f5a1a6a 0xc42078d59a 0xc42078d59b}],Finalizers:[],ClusterName:,Initializers:nil,},Spec:PodSpec{Volumes:[{default-token-zt2rw {nil nil nil nil nil SecretVolumeSource{SecretName:default-token-zt2rw,Items:[],DefaultMode:*420,Optional:nil,} nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil nil}}],Containers:[{c-00c54b7a-d015-11ec-8db7-12cc16fb82ca registry.kilox.cn/ai/offline_function_tensorrt/air_switch:amd-2.0 [] [] [{port-7070 0 7070 TCP }] [] [{ALIYUN_COM_GPU_SPECIAL_IDX 0xc420323600 nil} {DATA_CALLBACK_URL http://ai-service.bps:8000/ai/model/process/data nil} {MINIO_ACCESS_KEY AKIAIOSFODNN7EXAMGTF nil} {MINIO_BUCKET_NAME aibucket nil} {MINIO_ENDPOINT minio-k9jzg.bps:9000 nil} {MINIO_SECRET_KEY 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 nil} {MINIO_SECURE False nil} {NVIDIA_VISIBLE_DEVICES all nil}] {map[aliyun.com/gpu-mem:{{768 0} {<nil>} 768 DecimalSI}] map[aliyun.com/gpu-mem:{{768 0} {<nil>} 768 DecimalSI}]} [{default-token-zt2rw t rue /var/run/secrets/kubernetes.io/serviceaccount <nil>}] [] nil nil nil /dev/termination-log File IfNotPresent SecurityContext{Capabilities:&Capabilities{Add:[],Drop:[],},Privileged:nil,SELinuxOptions:nil,RunAsUser:nil,RunAsNonRoot:nil,ReadOnlyRootFilesystem:nil,AllowPrivilegeEscalation:nil,RunAsGroup:nil,} false false false}],RestartPolicy:Always,TerminationGracePeriodSeconds:*30,ActiveDeadlineSeconds:nil,DNSPolicy:ClusterFirst,NodeSelector:map[string]string{},ServiceAccountName:default,DeprecatedServiceAccount:default,NodeName:worker2,HostNetwork:false,HostPID:false,HostIPC:false,SecurityContext:&PodSecurityContext{SELinuxOptions:nil,RunAsUser:nil,RunAsNonRoot:nil,SupplementalGroups:[],FSGroup:nil,RunAsGroup:nil,Sysctls:[],},ImagePullSecrets:[],Hostname:,Subdomain:,Affinity:nil,SchedulerName:default-scheduler,InitContainers:[],AutomountServiceAccountToken:nil,Tolerations:[{node.kubernetes.io/not-ready Exists NoExecute 0xc420b1a0b0} {node.kubernetes.io/unreachable Exists NoExecute 0xc420b1a0f0}],HostAliases:[],PriorityClassName:,Priority:*0,DNSConfig:nil,ShareProcessNamespace:nil,ReadinessGates:[],},Status:PodStatus{Phase:Running,Conditions:[{Initialized True 0001-01-01 00:00:00 +0000 UTC 2022-06-28 07:21:57 +0000 UTC } {Ready False 0001-01-01 00:00:00 +0000 UTC 2022-06-28 07:23:03 +0000 UTC ContainersNotReady containers with unready status: [c-00c54b7a-d015-11ec-8db7-12cc16fb82ca]} {ContainersReady False 0001-01-01 00:00:00 +0000 UTC 2022-06-28 07:23:03 +0000 UTC ContainersNotReady containers with unready status: [c-00c54b7a-d015-11ec-8db7-12cc16fb82ca]} {PodScheduled True 0001-01-01 00:00:00 +0000 UTC 2022-06-28 07:21:57 +0000 UTC }],Message:,Reason:,HostIP:192.168.30.15,PodIP:10.42.4.202,StartTime:2022-06-28 07:21:57 +0000 UTC,ContainerStatuses:[{c-00c54b7a-d015-11ec-8db7-12cc16fb82ca {nil nil ContainerStateTerminated{ExitCode:137,Signal:0,Reason:Error,Message:,StartedAt:2022-06-28 07:21:58 +0000 UTC,FinishedAt:2022-06-28 07:23:02 +0000 UTC,ContainerID:docker://08a1a9341f692f9b4948de209faa91fd59429d3a08 7af1676a1bd69bb50ca013,}} {nil nil nil} false 0 registry.kilox.cn/ai/offline_function_tensorrt/air_switch:amd-2.0 docker-pullable://registry.kilox.cn/ai/offline_function_tensorrt/air_switch@sha256:6bfe6e755adbf2462ea7df3c6277d610fb2ccc2076b809dd7fe838aed8cac708 docker://08a1a9341f692f9b4948de209faa91fd59429d3a087af1676a1bd69bb50ca013}],QOSClass:BestEffort,InitContainerStatuses:[],NominatedNodeName:,},} [ ] 2022/06/28 07:23:04 Node map[worker2:0xc420632580 worker1:0xc420323600] [ ] 2022/06/28 07:23:04 GetNodeInfo() uses the existing nodeInfo for worker2 [ ] 2022/06/28 07:23:04 Pod p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-6846dd57f9-pxhfw in ns ai-model is not set the GPU ID -1 in node worker2 [ ] 2022/06/28 07:23:04 end processNextWorkItem() [ ] 2022/06/28 07:23:05 begin processNextWorkItem() [ ] 2022/06/28 07:23:30 No need to update pod name p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-6846dd57f9-cz8pl in ns ai-model and old status is Running, new status is Running; its old annotation map[cattle.io/timestamp:2022-06-28T06:57:35Z cni.projectcalico.org/podIP:10.42.4.201/32 cni.projectcalico.org/podIPs:10.42.4.201/32 field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] and new annotation map[cni.projectcalico.org/podIP: cni.projectcalico.org/podIPs: field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]] cattle.io/timestamp:2022-06-28T06:57:35Z] [ ] 2022/06/28 07:23:30 No need to update pod name p-00c54b7a-d015-11ec-8db7-12cc16fb82ca-6846dd57f9-cz8pl in ns ai-model and old status is Running, new status is Running; its old annotation map[cattle.io/timestamp:2022-06-28T06:57:35Z cni.projectcalico.org/podIP: cni.projectcalico.org/podIPs: field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]] and new annotation map[cattle.io/timestamp:2022-06-28T06:57:35Z cni.projectcalico.org/podIP: cni.projectcalico.org/podIPs: field.cattle.io/ports:[[{"containerPort":7070,"dnsName":"p-00c54b7a-d015-11ec-8db7-12cc16fb82ca","hostPort":0,"kind":"ClusterIP","name":"port-7070","protocol":"TCP","sourcePort":0}]]]