wandb / helm-charts

Our official helm charts for deploying wandb into k8s
MIT License
19 stars 8 forks source link

fix: Add support for more security context #240

Closed flamarion closed 1 week ago

flamarion commented 1 month ago

This has been tested with the following configuration for the weave and wandb app along with Redis as part of a research to deploy W&B in Openshift.

[...]
app:
  install: true
  pod:
    securityContext:
      runAsUser: 1000710000
      runAsGroup: 0
      runAsNonRoot: true
      fsGroup: 1000710000
      fsGroupChangePolicy: Always
      seccompProfile:
        type: RuntimeDefault

  container:
    securityContext:
      capabilities:
        drop:
          - ALL
      readOnlyRootFilesystem: false
      allowPrivilegeEscalation: false
[...]
weave:
  install: true
  pod:
    securityContext:
      runAsNonRoot: true
      runAsUser: 1000710000
      runAsGroup: 0
      fsGroup: 1000710000
      fsGroupChangePolicy: Always
      seccompProfile:
        type: RuntimeDefault

  container:
    securityContext:
      capabilities:
        drop:
          - ALL
      readOnlyRootFilesystem: false
      allowPrivilegeEscalation: false
[...]

redis:
  install: true
  master:
    # OpenShift specific configuration
    containerSecurityContext:
      enabled: true
      runAsUser: 1000710000
    # OpenShift specific configuration
    podSecurityContext:
      enabled: true
      fsGroup: 1000710000
[...]
flamarion commented 1 month ago

Otel and Yace deployments do not apply the entire sec context config. The capabilities part is not working, and I couldn't figure out what the issue could be. Besides this, all other pods I tested came up without any problem, even those with the partial sec context. I'm attaching the template file used to deploy and the command used to test was this.

helm upgrade --install wandb ./charts/operator-wandb -f ../../work/openshift/wandb-ocp4.yaml

This is the content of wandb-ocp4.yaml:

# apiVersion: apps.wandb.com/v1
# kind: WeightsAndBiases
# metadata:
#   labels:
#     app.kubernetes.io/instance: wandb
#     app.kubernetes.io/name: weightsandbiases
#     wandb.ai/console-default: "true"
#   name: wandb
#   namespace: wandb
# spec:
#   chart:
#     url: https://charts.wandb.ai
#     name: operator-wandb
#     version: 0.17.8

#   values:
global:
  # host: http://wandb-ocp.home.lab
  host: http://wandb.home.lab
  license: xxxxxxxxxx

  bucket:
    name: pves3.home.lab
    path: wandb
    provider: s3
    region: default
    accessKey: 3IAXODZ870OCD6TCFIAD
    secretKey: wsnNA1Vq2RHbKdrXSTaw0a09h79QQBZk0AXRUMNY
    kmsKey: null

  mysql:
    database: wandb
    host: mysql.home.lab
    password: password
    port: 3306
    user: wandb

  clickhouse:
    host: clickhouse.home.lab
    port: 8123
    password: wOUwktsnXa
    user: default
    database: wandb

  # podSecurityContext:
  #   runAsUser: 1000710000
  #   runAsGroup: 0
  #   runAsNonRoot: true
  #   fsGroup: 1000710000
  #   fsGroupChangePolicy: Always
  #   seccompProfile:
  #     type: RuntimeDefault

  # containerSecurityContext:
  #   securityContext:
  #         capabilities:
  #           drop:
  #             - ALL
  #         readOnlyRootFilesystem: false
  #         allowPrivilegeEscalation: false

mysql:
  install: false

app:
  install: true
  extraEnv:
    GORILLA_ALLOW_ANONYMOUS_PUBLIC_PROJECTS: true
    GORILLA_DATA_RETENTION_PERIOD: "36h"
    SERVER_FLAG_NAMED_WORKSPACES_AVAILABLE: true
    ENABLE_REGISTRY_UI: true
    SUPPORTED_FILE_STORES: "s3://3IAXODZ870OCD6TCFIAD:wsnNA1Vq2RHbKdrXSTaw0a09h79QQBZk0AXRUMNY@pves3.home.lab/team-a"

  pod:
    securityContext:
      runAsUser: 1000710000
      runAsGroup: 0
      runAsNonRoot: true
      fsGroup: 1000710000
      fsGroupChangePolicy: Always
      seccompProfile:
        type: RuntimeDefault

  container:
    securityContext:
      capabilities:
        drop:
          - ALL
      readOnlyRootFilesystem: false
      allowPrivilegeEscalation: false
      privileged: false

ingress:
  install: true
  class: nginx

console:
  install: true
  pod:
    securityContext:
      runAsUser: 1000710000
      runAsGroup: 0
      runAsNonRoot: true
      fsGroup: 1000710000
      fsGroupChangePolicy: Always
      seccompProfile:
        type: RuntimeDefault

  container:
    securityContext:
      capabilities:
        drop:
          - ALL
      readOnlyRootFilesystem: false
      allowPrivilegeEscalation: false
      privileged: false

parquet:
  install: true
  pod:
    securityContext:
      runAsUser: 1000710000
      runAsGroup: 0
      runAsNonRoot: true
      fsGroup: 1000710000
      fsGroupChangePolicy: Always
      seccompProfile:
        type: RuntimeDefault

  container:
    securityContext:
      capabilities:
        drop:
          - ALL
      readOnlyRootFilesystem: false
      allowPrivilegeEscalation: false
      privileged: false

weave:
  install: true
  pod:
    securityContext:
      runAsNonRoot: true
      runAsUser: 1000710000
      runAsGroup: 0
      fsGroup: 1000710000
      fsGroupChangePolicy: Always
      seccompProfile:
        type: RuntimeDefault

  container:
    securityContext:
      capabilities:
        drop:
          - ALL
      readOnlyRootFilesystem: false
      allowPrivilegeEscalation: false
      privileged: false

  persistence:
    accessMode: ReadWriteOnce

prometheus:
  install: true
  instance:
    install: true
  redis-exporter:
    install: true
  mysql-exporter:
    install: true

otel:
  install: true
  pod:
    securityContext:
      runAsNonRoot: true
      runAsUser: 1000710000
      runAsGroup: 0
      fsGroup: 1000710000
      fsGroupChangePolicy: "Always"
      seccompProfile:
        type: "RuntimeDefault"

  container:
    securityContext:
      capabilities:
        drop:
          - ALL
      readOnlyRootFilesystem: false
      allowPrivilegeEscalation: false
      privileged: false

redis:
  install: true
  master:
    # OpenShift specific configuration
    containerSecurityContext:
      enabled: true
      runAsUser: 1000710000
      privileged: false
    # OpenShift specific configuration
    podSecurityContext:
      enabled: true
      fsGroup: 1000710000

weave-trace:
  install: true
  pod:
    securityContext:
      runAsNonRoot: true
      runAsUser: 1000710000
      runAsGroup: 0
      fsGroup: 1000710000
      fsGroupChangePolicy: "Always"
      seccompProfile:
        type: "RuntimeDefault"

  container:
    securityContext:
      capabilities:
        drop:
          - ALL
      readOnlyRootFilesystem: false
      allowPrivilegeEscalation: false
      privileged: false

flat-run-fields-updater:
  install: true
  pod:
    securityContext:
      runAsNonRoot: true
      runAsUser: 1000710000
      runAsGroup: 0
      fsGroup: 1000710000
      fsGroupChangePolicy: "Always"
      seccompProfile:
        type: "RuntimeDefault"

  container:
    securityContext:
      capabilities:
        drop:
          - ALL
      readOnlyRootFilesystem: false
      allowPrivilegeEscalation: false
      privileged: false

yace:
  install: true
  # regions: ["ap-south-1"]
  pod:
    annotations:
      prometheus.io/scrape: "true"
      prometheus.io/port: "5000"
      prometheus.io/path: "/metrics"
      prometheus.io/scheme: http
    securityContext:
      runAsNonRoot: true
      runAsUser: 1000710000
      runAsGroup: 0
      fsGroup: 1000710000
      fsGroupChangePolicy: "Always"
      seccompProfile:
        type: "RuntimeDefault"

    container:
      securityContext:
        capabilities:
          drop:
            - ALL
        readOnlyRootFilesystem: false
        allowPrivilegeEscalation: false
        privileged: false

stackdriver:
  install: true
  pod:
    annotations:
      prometheus.io/scrape: "true"
      prometheus.io/port: "9255"
      prometheus.io/path: "/metrics"
      prometheus.io/scheme: http

    securityContext:
      runAsNonRoot: true
      runAsUser: 1000710000
      runAsGroup: 0
      fsGroup: 1000710000
      fsGroupChangePolicy: "Always"
      seccompProfile:
        type: "RuntimeDefault"

    container:
      securityContext:
        capabilities:
          drop:
            - ALL
        readOnlyRootFilesystem: false
        allowPrivilegeEscalation: false
        privileged: false
flamarion commented 1 month ago

Actually the problem now is only with Otel application that is not picking the security context config. The problem with yace was in the indentation in my spec file used to deploy the helm.

amanpruthi commented 1 month ago

Actually the problem now is only with Otel application that is not picking the security context config. The problem with yace was in the indentation in my spec file used to deploy the helm.

Try with below format:

otel:
  install: true
  daemonset:
    pod:
      securityContext:
        runAsNonRoot: true
        runAsUser: 1000710000
        runAsGroup: 0
        fsGroup: 1000710000
        fsGroupChangePolicy: "Always"
        seccompProfile:
          type: "RuntimeDefault"
    container:
      securityContext:
        capabilities:
          drop:
            - ALL
        readOnlyRootFilesystem: false
        allowPrivilegeEscalation: false
        privileged: false