turbonomic / kubeturbo

140 stars 75 forks source link

[TRB-44268]:Add segmentation commodity for the node_group entity #920

Closed kevinwangcn closed 1 year ago

kevinwangcn commented 1 year ago

Intent

Add segmentation commodity for the node_group entity. Each node_group entity should sell the segmentation commodity if there is any workload that has an anti-affinity term with the topologyKey as the node_group id

Background

https://jsw.ibm.com/browse/TRB-44268

Testing

Pre-setup:

  1. Label the worker nodes with kubernetes.io/zone=zone1 or kubernetes.io/zone=zone2 in a K8s cluster

    [root@api.ocp410kev.cp.fyre.ibm.com ~]# k get nodes --show-labels 
    NAME                                STATUS                     ROLES    AGE    VERSION            LABELS
    master0.ocp410kev.cp.fyre.ibm.com   Ready                      master   249d   v1.23.17+16bcd69   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=master0.ocp410kev.cp.fyre.ibm.com,kubernetes.io/os=linux,node-role.kubernetes.io/master=,node.openshift.io/os_id=rhcos
    master1.ocp410kev.cp.fyre.ibm.com   Ready                      master   249d   v1.23.17+16bcd69   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=master1.ocp410kev.cp.fyre.ibm.com,kubernetes.io/os=linux,node-role.kubernetes.io/master=,node.openshift.io/os_id=rhcos
    master2.ocp410kev.cp.fyre.ibm.com   Ready                      master   249d   v1.23.17+16bcd69   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=master2.ocp410kev.cp.fyre.ibm.com,kubernetes.io/os=linux,node-role.kubernetes.io/master=,node.openshift.io/os_id=rhcos
    worker0.ocp410kev.cp.fyre.ibm.com   Ready                      worker   249d   v1.23.17+16bcd69   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=worker0.ocp410kev.cp.fyre.ibm.com,kubernetes.io/os=linux,kubernetes.io/zone=zone1,node-role.kubernetes.io/worker=,node.openshift.io/os_id=rhcos,scctest=yes
    worker1.ocp410kev.cp.fyre.ibm.com   Ready                      worker   249d   v1.23.17+16bcd69   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=worker1.ocp410kev.cp.fyre.ibm.com,kubernetes.io/os=linux,kubernetes.io/zone=zone1,node-role.kubernetes.io/worker=,node.openshift.io/os_id=rhcos,xxx=yyy
    worker2.ocp410kev.cp.fyre.ibm.com   Ready,SchedulingDisabled   worker   249d   v1.23.12+8a6bfe4   beta.kubernetes.io/arch=amd64,beta.kubernetes.io/os=linux,kubernetes.io/arch=amd64,kubernetes.io/hostname=worker2.ocp410kev.cp.fyre.ibm.com,kubernetes.io/os=linux,kubernetes.io/zone=zone2,node-role.kubernetes.io/worker=,node.openshift.io/os_id=rhcos
  2. Create a deployment with anti-affinity and the topologyKey as kubernetes.io/zone

    apiVersion: apps/v1
    kind: Deployment
    metadata:
    name: pod-anti-affinity
    namespace: default
    spec:
    progressDeadlineSeconds: 600
    replicas: 1
    revisionHistoryLimit: 10
    selector:
    matchLabels:
      app: pod-anti-affinity
      database: db2
    strategy:
    rollingUpdate:
      maxSurge: 25%
      maxUnavailable: 25%
    type: RollingUpdate
    template:
    metadata:
      creationTimestamp: null
      labels:
        app: pod-anti-affinity
        database: db2
      name: pod-anti-affinity
    spec:
      affinity:
        podAntiAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
          - labelSelector:
              matchExpressions:
              - key: database
                operator: In
                values:
                - db2    <-----------self anti-affinity
            topologyKey: kubernetes.io/zone <-----------HERE
      containers:
      - env:
        - name: RUN_TYPE
          value: cpu
        - name: CPU_PERCENT
          value: "1"
        image: beekman9527/cpumemload:latest
        imagePullPolicy: Always
        name: pod-anti-affinity-1
        resources:
          limits:
            cpu: 100m
            memory: 256Mi
          requests:
            cpu: 1m
            memory: 1Mi
        terminationMessagePath: /dev/termination-log
        terminationMessagePolicy: File
      dnsPolicy: ClusterFirst
      restartPolicy: Always
      schedulerName: default-scheduler
      securityContext: {}
      terminationGracePeriodSeconds: 30

Expected to see

Do a rediscovery and broadcast, dump the topology from TP

Search the keywords zone1, there should be a Node_Group entity whose name contains the zone1. Check its sold commodity list, there should be a segmentation commodity that has the name of the workload controller as the key

"entity": {
              "entityType": 77,  <------Node_Group entity
              "typeSpecificInfo": {},
              "oid": "75100735552417",
              "displayName": "kubernetes.io/zone\u003dzone1@d6f4db82-5e6e-4111-8630-803389e62212",<--- Name contains zone1
              "environmentType": "HYBRID",
              "commoditySoldList": [
                {
                  "commodityType": {
                    "type": 34,  <----Segmentation commodity 
                    "key": "Deployment/default/pod-anti-affinity" <----- Key is the name of the workload controller
                  },
                  "capacity": 1.0, <--------Capacity is 1 for pod spread
                  "isResizeable": false,
                  "isThin": true,
                  "active": true,
                  "displayName": "",
                  "aggregates": []
                }
              ],
              "commoditiesBoughtFromProviders": [],
              "entityState": "POWERED_ON",
              "entityPropertyMap": {},
              "analysisSettings": {
                "isAvailableAsProvider": true,
                "shopTogether": false,
                "controllable": true,
                "providerMustClone": false,
                "deletable": true
              },
              "origin": {
                "discoveryOrigin": {
                  "discoveringTargetIds": [],
                  "lastUpdatedTime": "1691779315511",
                  "discoveredTargetData": {
                    "75088732845744": {
                      "vendorId": "kubernetes.io/zone\u003dzone1@d6f4db82-5e6e-4111-8630-803389e62212",
                      "origin": "DISCOVERED"
                    }
                  }
                }
              },
              "connectedEntityList": [],
              "unplacedReason": [],
              "details": [],
              "stale": false
            }
          },
...

Search another node_group entity with the keyword zone2, there should be similar output as the previous one

{
            "entity": {
              "entityType": 77, <---------- Node_Group entity
              "typeSpecificInfo": {},
              "oid": "75100735552418",
              "displayName": "kubernetes.io/zone\u003dzone2@d6f4db82-5e6e-4111-8630-803389e62212",<----Name contains `zone2`
              "environmentType": "HYBRID",
              "commoditySoldList": [
                {
                  "commodityType": {
                    "type": 34,  <------Segmentation commodity
                    "key": "Deployment/default/pod-anti-affinity"   <------The key is the name of the workload controller
                  },
                  "capacity": 1.0, <--------Capacity is 1 for pod spread
                  "isResizeable": false,
                  "isThin": true,
                  "active": true,
                  "displayName": "",
                  "aggregates": []
                }
              ],
              "commoditiesBoughtFromProviders": [],
              "entityState": "POWERED_ON",
              "entityPropertyMap": {},
              "analysisSettings": {
                "isAvailableAsProvider": true,
                "shopTogether": false,
                "controllable": true,
                "providerMustClone": false,
                "deletable": true
              },
              "origin": {
                "discoveryOrigin": {
                  "discoveringTargetIds": [],
                  "lastUpdatedTime": "1691779315511",
                  "discoveredTargetData": {
                    "75088732845744": {
                      "vendorId": "kubernetes.io/zone\u003dzone2@d6f4db82-5e6e-4111-8630-803389e62212",
                      "origin": "DISCOVERED"
                    }
                  }
                }
              },
              "connectedEntityList": [],
              "unplacedReason": [],
              "details": [],
              "stale": false
            }
          },

Checklist

These are the items that must be done by the developer and by reviewers before the change is ready to merge. Please strikeout any items that are not applicable, but don't delete them

Audience

(@ mention any review/... groups or people that should be aware of this merge request)