acryldata / datahub-helm

Repository of helm charts for deploying DataHub on a Kubernetes cluster
Apache License 2.0
156 stars 236 forks source link

Prerequisites stale in Pending state #467

Open aauer-powens opened 1 month ago

aauer-powens commented 1 month ago

Hello, Just deployed datahub prerequisites while adding some tolerations and affinity to match my cluster requirements but the pods are in a Pending state forever:

NAME                           READY   STATUS    RESTARTS   AGE
elasticsearch-master-0         0/1     Pending   0          98s
prerequisites-kafka-broker-0   0/1     Pending   0          98s
prerequisites-mysql-0          0/1     Pending   0          98s
prerequisites-zookeeper-0      0/1     Pending   0          98s

When i'm describing each of them i don't have any events to deep dive. As my affinity and tolerations are not confidential, see my following values.yaml

# Copy this file and update to the configuration of choice
elasticsearch:
  # set this to false, if you want to provide your own ES instance.
  enabled: true

  # If you're running in production, set this to 3 and comment out antiAffinity below
  # Or alternatively if you're running production, bring your own ElasticSearch
  replicas: 1
  minimumMasterNodes: 1
  # Set replicas to 1 and uncomment this to allow the instance to be scheduled on
  # a master node when deploying on a single node Minikube / Kind / etc cluster.
  antiAffinity: "soft"

  # # If you are running a multi-replica cluster, comment this out
  clusterHealthCheckParams: "wait_for_status=yellow&timeout=1s"

  # # Shrink default JVM heap.
  esJavaOpts: "-Xmx512m -Xms512m"

  # # Allocate smaller chunks of memory per pod.
  resources:
    requests:
      cpu: "100m"
      memory: "1024M"
    limits:
      cpu: "1000m"
      memory: "1024M"

  tolerations: 
    - key: "dedicated"
      operator: "Equal"
      value: "dev"
      effect: "NoSchedule"
  # affinity -- affinity for scheduler pod assignment, look `kubectl explain pod.spec.affinity` for details
  nodeAffinity:
    requiredDuringSchedulingIgnoredDuringExecution:
      nodeSelectorTerms:
        - matchExpressions:
            - key: kubernetes/node-group
              operator: In
              values:
                - dev
  # # Request smaller persistent volumes.
  # volumeClaimTemplate:
  #   accessModes: ["ReadWriteOnce"]
  #   storageClassName: "standard"
  #   resources:
  #     requests:
  #       storage: 100M

# Official neo4j chart, supports both community and enterprise editions
# see https://neo4j.com/docs/operations-manual/current/kubernetes/ for more information
# source: https://github.com/neo4j/helm-charts
neo4j:
  enabled: false
  nameOverride: neo4j
  neo4j:
    name: neo4j
    edition: "community"
    acceptLicenseAgreement: "yes"
    defaultDatabase: "graph.db"
    password: "datahub"
    # For better security, add password to neo4j-secrets k8s secret with  neo4j-username neo4j-passwordn and NEO4J_AUTH and uncomment below
    # NEO4J_AUTH: should be composed like so: {Username}/{Password}
    # passwordFromSecret: neo4j-secrets
    podSpec:
      tolerations: 
        - key: "dedicated"
          operator: "Equal"
          value: "dev"
          effect: "NoSchedule"
      # affinity -- affinity for scheduler pod assignment, look `kubectl explain pod.spec.affinity` for details
      affinity: 
        nodeAffinity:
          requiredDuringSchedulingIgnoredDuringExecution:
            nodeSelectorTerms:
              - matchExpressions:
                  - key: kubernetes/node-group
                    operator: In
                    values:
                      - dev
    tolerations: 
      - key: "dedicated"
        operator: "Equal"
        value: "dev"
        effect: "NoSchedule"
    # affinity -- affinity for scheduler pod assignment, look `kubectl explain pod.spec.affinity` for details
    affinity: 
      nodeAffinity:
        requiredDuringSchedulingIgnoredDuringExecution:
          nodeSelectorTerms:
            - matchExpressions:
                - key: kubernetes/node-group
                  operator: In
                  values:
                    - dev
  tolerations: 
    - key: "dedicated"
      operator: "Equal"
      value: "dev"
      effect: "NoSchedule"
  # affinity -- affinity for scheduler pod assignment, look `kubectl explain pod.spec.affinity` for details
  affinity: 
    nodeAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        nodeSelectorTerms:
          - matchExpressions:
              - key: kubernetes/node-group
                operator: In
                values:
                  - dev
  # Set security context for pod
  securityContext:
    runAsNonRoot: true
    runAsUser: 7474
    runAsGroup: 7474
    fsGroup: 7474
    fsGroupChangePolicy: "Always"

  # Disallow privilegeEscalation on container level
  containerSecurityContext:
    allowPrivilegeEscalation: false

  # Create a volume for neo4j, SSD storage is recommended
  volumes: {}
      # mode: "dynamic"
      # dynamic:
      #   storageClassName: managed-csi-premium

  env:
    NEO4J_PLUGINS: '["apoc"]'

mysql:
  enabled: true
  auth:
    # For better security, add mysql-secrets k8s secret with mysql-root-password, mysql-replication-password and mysql-password
    existingSecret: mysql-secrets
  primary:
    extraFlags: "--character-set-server=utf8mb4 --collation-server=utf8mb4_bin"
    tolerations: 
      - key: "dedicated"
        operator: "Equal"
        value: "dev"
        effect: "NoSchedule"
    # affinity -- affinity for scheduler pod assignment, look `kubectl explain pod.spec.affinity` for details
    affinity: 
      nodeAffinity:
        requiredDuringSchedulingIgnoredDuringExecution:
          nodeSelectorTerms:
            - matchExpressions:
                - key: kubernetes/node-group
                  operator: In
                  values:
                    - dev

postgresql:
  enabled: false
  auth:
    # For better security, add postgresql-secrets k8s secret with postgres-password, replication-password and password
    existingSecret: postgresql-secrets
  tolerations: 
    - key: "dedicated"
      operator: "Equal"
      value: "dev"
      effect: "NoSchedule"
  # affinity -- affinity for scheduler pod assignment, look `kubectl explain pod.spec.affinity` for details
  affinity: 
    nodeAffinity:
      requiredDuringSchedulingIgnoredDuringExecution:
        nodeSelectorTerms:
          - matchExpressions:
              - key: kubernetes/node-group
                operator: In
                values:
                  - dev
# Using gcloud-proxy requires the node in a GKE cluster to have Cloud SQL Admin scope,
# you will need to create a new node and migrate the workload if your current node does not have this scope
gcloud-sqlproxy:
  enabled: false
  # Specify an existing secret holding the cloud-sql service account credentials, if not specify,
  # the default compute engine service account will be used and it needs to have Cloud SQL Client role
  existingSecret: ""
  # The key in the existing secret that stores the credentials
  existingSecretKey: ""
  # SQL connection settings
  cloudsql:
    # MySQL instances:
    # update with your GCP project, the region of your Cloud SQL instance and the id of your Cloud SQL instance
    # use port 3306 for MySQL, or other port you set for your SQL instance.
    instances:
      # GCP Cloud SQL instance id
      - instance: ""
        # GCP project where the instance exists.
        project: ""
        # GCP region where the instance exists.
        region: ""
        # Port number for the proxy to expose for this instance.
        port: 3306

cp-helm-charts:
  enabled: false
  # Schema registry is under the community license
  cp-schema-registry:
    enabled: false
    kafka:
      # <<release-name>>-kafka:9092
      bootstrapServers: "prerequisites-kafka:9092"
  cp-kafka:
    enabled: false
  cp-zookeeper:
    enabled: false
  cp-kafka-rest:
    enabled: false
  cp-kafka-connect:
    enabled: false
  cp-ksql-server:
    enabled: false
  cp-control-center:
    enabled: false

# Bitnami version of Kafka that deploys open source Kafka https://artifacthub.io/packages/helm/bitnami/kafka
kafka:
  enabled: true
  listeners:
    client:
      protocol: PLAINTEXT
    interbroker:
      protocol: PLAINTEXT
  controller:
    replicaCount: 0
  broker:
    replicaCount: 1
    # The new minId for broker is 100. If we don't override this, the broker will have id 100
    # and cannot load the partitions. So we set minId to 0 to be backwards compatible
    minId: 0
    # These server properties are no longer exposed as parameters in the bitnami kafka chart since 24.0.0
    # They need to be passed in through extraConfig. See below for reference
    # https://github.com/bitnami/charts/tree/main/bitnami/kafka#to-2400
    extraConfig: |
      message.max.bytes=5242880
      default.replication.factor=1
      offsets.topic.replication.factor=1
      transaction.state.log.replication.factor=1
    tolerations: 
      - key: "dedicated"
        operator: "Equal"
        value: "dev"
        effect: "NoSchedule"
    # affinity -- affinity for scheduler pod assignment, look `kubectl explain pod.spec.affinity` for details
    affinity: 
      nodeAffinity:
        requiredDuringSchedulingIgnoredDuringExecution:
          nodeSelectorTerms:
            - matchExpressions:
                - key: kubernetes/node-group
                  operator: In
                  values:
                    - dev
  kraft:
    enabled: false
  zookeeper:
    enabled: true
    tolerations: 
      - key: "dedicated"
        operator: "Equal"
        value: "dev"
        effect: "NoSchedule"
    # affinity -- affinity for scheduler pod assignment, look `kubectl explain pod.spec.affinity` for details
    affinity: 
      nodeAffinity:
        requiredDuringSchedulingIgnoredDuringExecution:
          nodeSelectorTerms:
            - matchExpressions:
                - key: kubernetes/node-group
                  operator: In
                  values:
                    - dev
github-actions[bot] commented 6 days ago

This issue is stale because it has been open for 30 days with no activity. If you believe this is still an issue on the latest DataHub release please leave a comment with the version that you tested it with. If this is a question/discussion please head to https://slack.datahubproject.io. For feature requests please use https://feature-requests.datahubproject.io