redhat-manufacturing / edge-lab-gitops

Bend OpenShift to your will with ArgoCD in the Houston edge lab
3 stars 5 forks source link

Argo components #44

Closed strangiato closed 1 year ago

strangiato commented 1 year ago

This PR updates the configuration for argo to utilizing kustomize components to make configuring argocd more modular.

This PR includes a few intentional changes in how the final argo instance is rendered:

  1. Update to using annotation based tracking instead of the default labels. This should help avoid some possible sync problems with operators that utilize the same labels or over zealously apply labels to child objects (looking at you OpenShift Serverless)
  2. Switching from using the old resourceCustomizations field for custom health checks to the newer resourceHealthChecks. There shouldn't be much functional change here. resouceHealthChecks provides a better list based approach that makes it easier to manage and append health checks with kustomize.
  3. Adding additional custom health checks including InstallPlan, Build, and ImageStream.
  4. Enabling notifications in argo (not in use today but it is there in case anyone wants to explore using it)
strangiato commented 1 year ago

For reference here is the original rendered manifest for argo in main:

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: argocd-application-controller-cluster-admin
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin
subjects:
- kind: ServiceAccount
  name: openshift-gitops-argocd-application-controller
  namespace: openshift-gitops
---
apiVersion: argoproj.io/v1alpha1
kind: ArgoCD
metadata:
  name: openshift-gitops
  namespace: openshift-gitops
spec:
  applicationSet:
    resources:
      limits:
        cpu: "2"
        memory: 1Gi
      requests:
        cpu: 250m
        memory: 512Mi
  controller:
    processors: {}
    resources:
      limits:
        cpu: "2"
        memory: 2Gi
      requests:
        cpu: 250m
        memory: 1Gi
    sharding: {}
  grafana:
    enabled: false
    ingress:
      enabled: false
    resources:
      limits:
        cpu: 500m
        memory: 256Mi
      requests:
        cpu: 250m
        memory: 128Mi
    route:
      enabled: false
  ha:
    enabled: false
    resources:
      limits:
        cpu: 500m
        memory: 256Mi
      requests:
        cpu: 250m
        memory: 128Mi
  initialSSHKnownHosts: {}
  rbac:
    policy: |
      g, gitops-admins, role:admin
      g, lab-cluster-admins, role:admin
      g, system:cluster-admins, role:admin
      g, cluster-admins, role:admin
    scopes: '[groups]'
  redis:
    resources:
      limits:
        cpu: 500m
        memory: 256Mi
      requests:
        cpu: 250m
        memory: 128Mi
  repo:
    resources:
      limits:
        cpu: "1"
        memory: 2Gi
      requests:
        cpu: 250m
        memory: 256Mi
  resourceCustomizations: "operators.coreos.com/Subscription:\n  health.lua: |\n    health_status
    = {}\n    if obj.status ~= nil then\n      if obj.status.conditions ~= nil then\n
    \       numDegraded = 0\n        numPending = 0\n        msg = \"\"\n        for
    i, condition in pairs(obj.status.conditions) do\n          msg = msg .. i .. \":
    \" .. condition.type .. \" | \" .. condition.status .. \"\\n\"\n          if condition.type
    == \"InstallPlanPending\" and condition.status == \"True\" then\n            numPending
    = numPending + 1\n          elseif (condition.type == \"CatalogSourcesUnhealthy\"
    or condition.type == \"InstallPlanMissing\" or condition.type == \"InstallPlanFailed\"
    or condition.type == \"ResolutionFailed\") and condition.status == \"True\" then\n
    \           numDegraded = numDegraded + 1\n          end\n        end\n        if
    numDegraded == 0 and numPending == 0 then\n          health_status.status = \"Healthy\"\n
    \         health_status.message = msg\n          return health_status\n        elseif
    numPending > 0 and numDegraded == 0 then\n          health_status.status = \"Progressing\"\n
    \         health_status.message = \"An install plan for a subscription is pending
    installation\"\n          return health_status\n        else\n          health_status.status
    = \"Degraded\"\n          health_status.message = msg\n          return health_status\n
    \       end\n      end\n    end\n    health_status.status = \"Progressing\"\n
    \   health_status.message = \"An install plan for a subscription is pending installation\"\n
    \   return health_status\nodf.openshift.io/StorageSystem:\n  health.lua: |\n    health_status
    = {}\n    if obj.status ~= nil then\n      if obj.status.conditions ~= nil then\n
    \       msg = \"\"\n        for i, condition in pairs(obj.status.conditions) do\n
    \         msg = msg .. i .. \": \" .. condition.type .. \" | \" .. condition.status
    .. \"\\n\"\n\n          if condition.type == \"Available\" and condition.status
    == \"True\" then\n            health_status.status = \"Healthy\"\n          elseif
    condition.type == \"Progressing\" and condition.status == \"True\" then\n            health_status.status
    = \"Progressing\"\n          elseif (condition.type == \"StorageSystemInvalid\"
    and condition.status == \"True\") or (condition.type == \"VendorCsvReady\" and
    condition.status == \"False\") or (condition.type == \"VendorSystemPresent\" and
    condition.status == \"False\") then\n            health_status.status = \"Degraded\"\n
    \         end\n          \n        end\n\n        health_status.message = msg\n
    \       return health_status\n      end\n    end\n    health_status.status = \"Progressing\"\n
    \   health_status.message = \"The StorageSystem is progressing\"\n    return health_status\n"
  resourceExclusions: |
    - apiGroups:
      - tekton.dev
      clusters:
      - '*'
      kinds:
      - TaskRun
      - PipelineRun
  server:
    autoscale:
      enabled: false
    grpc:
      ingress:
        enabled: false
    ingress:
      enabled: false
    resources:
      limits:
        cpu: 500m
        memory: 256Mi
      requests:
        cpu: 125m
        memory: 128Mi
    route:
      enabled: true
      tls:
        termination: reencrypt
    service:
      type: ""
  sso:
    provider: dex
  tls:
    ca: {}
---
apiVersion: user.openshift.io/v1
kind: Group
metadata:
  name: gitops-admins
  namespace: openshift-gitops
users:
- codekow
- strangiato
strangiato commented 1 year ago

And here is the updated manifest:

apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
  name: openshift-gitops-application-controller-cluster-admin
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: cluster-admin
subjects:
- kind: ServiceAccount
  name: openshift-gitops-argocd-application-controller
  namespace: openshift-gitops
---
apiVersion: argoproj.io/v1alpha1
kind: ArgoCD
metadata:
  name: openshift-gitops
  namespace: openshift-gitops
spec:
  applicationSet:
    resources:
      limits:
        cpu: "2"
        memory: 1Gi
      requests:
        cpu: 250m
        memory: 512Mi
    webhookServer:
      ingress:
        enabled: false
      route:
        enabled: false
  controller:
    resources:
      limits:
        cpu: "2"
        memory: 2Gi
      requests:
        cpu: 250m
        memory: 1Gi
  grafana:
    enabled: false
    ingress:
      enabled: false
    resources:
      limits:
        cpu: 500m
        memory: 256Mi
      requests:
        cpu: 250m
        memory: 128Mi
    route:
      enabled: false
  ha:
    enabled: false
    resources:
      limits:
        cpu: 500m
        memory: 256Mi
      requests:
        cpu: 250m
        memory: 128Mi
  monitoring:
    enabled: false
  notifications:
    enabled: true
  prometheus:
    enabled: false
    ingress:
      enabled: false
    route:
      enabled: false
  rbac:
    policy: |
      g, gitops-admins, role:admin
      g, lab-cluster-admins, role:admin
      g, system:cluster-admins, role:admin
      g, cluster-admins, role:admin
    scopes: '[groups]'
  redis:
    resources:
      limits:
        cpu: 500m
        memory: 256Mi
      requests:
        cpu: 250m
        memory: 128Mi
  repo:
    resources:
      limits:
        cpu: "1"
        memory: 2Gi
      requests:
        cpu: 250m
        memory: 256Mi
  resourceExclusions: |
    - apiGroups:
      - tekton.dev
      clusters:
      - '*'
      kinds:
      - TaskRun
      - PipelineRun
  resourceHealthChecks:
  - check: |
      health_status = {}
      if obj.status ~= nil then
        if obj.status.conditions ~= nil then
          msg = ""
          for i, condition in pairs(obj.status.conditions) do
            msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. "\n"

            if condition.type == "Available" and condition.status == "True" then
              health_status.status = "Healthy"
            elseif condition.type == "Progressing" and condition.status == "True" then
              health_status.status = "Progressing"
            elseif (condition.type == "StorageSystemInvalid" and condition.status == "True") or (condition.type == "VendorCsvReady" and condition.status == "False") or (condition.type == "VendorSystemPresent" and condition.status == "False") then
              health_status.status = "Degraded"
            end

          end

          health_status.message = msg
          return health_status
        end
      end
      health_status.status = "Progressing"
      health_status.message = "The StorageSystem is progressing"
      return health_status
    group: odf.openshift.io
    kind: StorageSystem
  - check: |
      hs = {}
      if obj.status ~= nil then
        if obj.status.phase ~= nil then
          if obj.status.phase == "Complete" then
            hs.status = "Healthy"
            hs.message = obj.status.phase
            return hs
          end
        end
      end
      hs.status = "Progressing"
      hs.message = "Waiting for InstallPlan to complete"
      return hs
    group: operators.coreos.com
    kind: InstallPlan
  - check: |
      health_status = {}
      if obj.status ~= nil then
        if obj.status.conditions ~= nil then
          numDegraded = 0
          numPending = 0
          msg = ""
          for i, condition in pairs(obj.status.conditions) do
            msg = msg .. i .. ": " .. condition.type .. " | " .. condition.status .. "\n"
            if condition.type == "InstallPlanPending" and condition.status == "True" then
              numPending = numPending + 1
            elseif (condition.type == "InstallPlanMissing" and condition.reason ~= "ReferencedInstallPlanNotFound") then
              numDegraded = numDegraded + 1
            elseif (condition.type == "CatalogSourcesUnhealthy" or condition.type == "InstallPlanFailed" or condition.type == "ResolutionFailed") and condition.status == "True" then
              numDegraded = numDegraded + 1
            end
          end
          if numDegraded == 0 and numPending == 0 then
            health_status.status = "Healthy"
            health_status.message = msg
            return health_status
          elseif numPending > 0 and numDegraded == 0 then
            health_status.status = "Progressing"
            health_status.message = "An install plan for a subscription is pending installation"
            return health_status
          else
            health_status.status = "Degraded"
            health_status.message = msg
            return health_status
          end
        end
      end
      health_status.status = "Progressing"
      health_status.message = "An install plan for a subscription is pending installation"
      return health_status
    group: operators.coreos.com
    kind: Subscription
  - check: |
      hs = {}
      if obj.status ~= nil then
        if obj.status.phase ~= nil then
          if obj.status.phase == "Complete" then
            hs.status = "Healthy"
            hs.message = obj.status.phase
            return hs
          end
        end
      end
      hs.status = "Progressing"
      hs.message = "Waiting for Build to complete"
      return hs
    group: build.openshift.io
    kind: Build
  - check: |
      hs = {}
      hs.status = "Progressing"
      hs.message = ""
      if obj.status ~= nil then
        if obj.status.tags ~= nil then
          numTags = 0
          for _ , item in pairs(obj.status.tags) do
            numTags = numTags + 1
            numItems = 0
            if item.tags ~= nil then
              for _ , item in pairs(item.tags) do
                numItems = numItems + 1
              end
              if numItems == 0 then
                return hs
              end
            end
          end
          if numTags > 0 then
            hs.status = "Healthy"
            hs.message = "ImageStream has tags resolved"
            return hs
          end
        end
      end
      return hs
    group: image.openshift.io
    kind: ImageStream
  resourceTrackingMethod: annotation
  server:
    autoscale:
      enabled: false
    grpc:
      ingress:
        enabled: false
    ingress:
      enabled: false
    resources:
      limits:
        cpu: 500m
        memory: 256Mi
      requests:
        cpu: 125m
        memory: 128Mi
    route:
      enabled: true
      tls:
        insecureEdgeTerminationPolicy: Redirect
        termination: reencrypt
    service:
      type: ""
  sso:
    dex:
      openShiftOAuth: true
      resources:
        limits:
          cpu: 500m
          memory: 256Mi
        requests:
          cpu: 250m
          memory: 128Mi
    provider: dex
---
apiVersion: user.openshift.io/v1
kind: Group
metadata:
  name: gitops-admins
  namespace: openshift-gitops
users:
- codekow
- strangiato