datainfrahq / druid-operator

Apache Druid On Kubernetes
Other
101 stars 42 forks source link

Coordinator and router not created #168

Open AtoLrn opened 4 months ago

AtoLrn commented 4 months ago

Hi,

We are having issues having the coordinator and router created. There is not mention of coordinator or router in the druid ressource event log nor in the operator's log

Do you have any idea ? I think it might be related to this issue (https://github.com/datainfrahq/druid-operator/issues/105) but it has been closed to I reopen here

Here is the whole cluster configuration:

apiVersion: druid.apache.org/v1alpha1
kind: Druid
metadata:
  name: druid
  namespace: druid-operator
spec:
  image: apache/druid:29.0.0
  startScript: /druid.sh
  scalePvcSts: true
  rollingDeploy: true
  defaultProbes: false
  podLabels:
    environment: stage
    release: alpha
  podAnnotations:
    dummy: k8s_extn_needs_atleast_one_annotation
  services:
    - spec:
        type: ClusterIP
        clusterIP: None
  commonConfigMountPath: "/opt/druid/conf/druid/cluster/_common"
  jvm.options: |-
    -server
    -XX:MaxDirectMemorySize=10240g
    -Duser.timezone=UTC
    -Dfile.encoding=UTF-8
    -Dlog4j.debug
    -Djava.util.logging.manager=org.apache.logging.log4j.jul.LogManager
    -Djava.io.tmpdir=/druid/data
  log4j.config: |
    <?xml version="1.0" encoding="UTF-8" ?>
    <Configuration status="WARN">
        <Appenders>
            <Console name="Console" target="SYSTEM_OUT">
                <PatternLayout pattern="%d{ISO8601} %p [%t] %c - %m%n"/>
            </Console>
        </Appenders>
        <Loggers>
            <Root level="info">
                <AppenderRef ref="Console"/>
            </Root>
        </Loggers>
    </Configuration>
  common.runtime.properties: |
    ${ runtime_properties }
  env:
    - name: POD_NAME
      valueFrom:
        fieldRef:
          fieldPath: metadata.name
    - name: POD_NAMESPACE
      valueFrom:
        fieldRef:
          fieldPath: metadata.namespace

  nodes:
    brokers:
      kind: Deployment
      nodeType: "broker"
      druid.port: 8088
      services:
        - spec:
            type: ClusterIP
            clusterIP: None
      nodeConfigMountPath: "/opt/druid/conf/druid/cluster/query/broker"
      replicas: 1
      runtime.properties: |
        druid.service=druid/broker
        # HTTP server threads
        druid.broker.http.numConnections=5
        druid.server.http.numThreads=40
        # Processing threads and buffers
        druid.processing.buffer.sizeBytes=25000000
        druid.sql.enable=true
      extra.jvm.options: |-
        -Xmx512m
        -Xms512m

    coordinators:
      kind: Deployment
      nodeType: "coordinator"
      druid.port: 8088
      services:
        - spec:
            type: ClusterIP
            clusterIP: None
      nodeConfigMountPath: "/opt/druid/conf/druid/cluster/master/coordinator-overlord"
      replicas: 1
      runtime.properties: |
        druid.service=druid/coordinator

        # HTTP server threads
        druid.coordinator.startDelay=PT30S
        druid.coordinator.period=PT30S

        # Configure this coordinator to also run as Overlord
        druid.coordinator.asOverlord.enabled=true
        druid.coordinator.asOverlord.overlordService=druid/overlord
      extra.jvm.options: |-
        -Xmx512M
        -Xms512M

    hot:
      nodeType: "historical"
      druid.port: 8088
      resources:
        requests:
          memory: "1.5Mi"
          cpu: "1"
      services:
        - spec:
            type: ClusterIP
            clusterIP: None
      nodeConfigMountPath: "/opt/druid/conf/druid/cluster/data/historical"
      replicas: 1
      livenessProbe:
        failureThreshold: 10
        httpGet:
          path: /status/health
          port: 8088
        initialDelaySeconds: 5
        periodSeconds: 10
        successThreshold: 1
        timeoutSeconds: 5
      readinessProbe:
        failureThreshold: 20
        httpGet:
          path: /druid/historical/v1/loadstatus
          port: 8088
        initialDelaySeconds: 5
        periodSeconds: 10
        successThreshold: 1
        timeoutSeconds: 5
      startUpProbe:
        failureThreshold: 20
        httpGet:
          path: /druid/historical/v1/loadstatus
          port: 8088
        initialDelaySeconds: 60
        periodSeconds: 30
        successThreshold: 1
        timeoutSeconds: 10
      volumeMounts:
      - mountPath: /druid/data/segments
        name: hot-volume
      volumeClaimTemplates:
       - metadata:
           name: hot-volume
         spec:
           accessModes:
           - ReadWriteOnce
           resources:
             requests:
               storage: 5Gi
           storageClassName: gp
      runtime.properties: |
        druid.service=druid/hot
        druid.server.tier=hot
        druid.server.priority=1
        druid.processing.buffer.sizeBytes=25000000
        druid.processing.numThreads=2
        # Segment storage
        druid.segmentCache.locations=[{"path":"/druid/data/segments","maxSize":1000000000}]
        druid.server.maxSize=1000000000
      extra.jvm.options: |-
        -Xmx512m
        -Xms512m

    cold:
      druid.port: 8088
      env:
      - name: DRUID_XMS
        value: 1000m
      - name: DRUID_XMX
        value: 1000m
      - name: DRUID_MAXDIRECTMEMORYSIZE
        value: 2g
      - name: POD_NAME
        valueFrom:
          fieldRef:
            fieldPath: metadata.name
      - name: POD_NAMESPACE
        valueFrom:
          fieldRef:
            fieldPath: metadata.namespace
      livenessProbe:
        failureThreshold: 3
        httpGet:
          path: /status/health
          port: 8088
        initialDelaySeconds: 1800
        periodSeconds: 5
      nodeConfigMountPath: /opt/druid/conf/druid/cluster/data/historical
      nodeType: historical
      podDisruptionBudgetSpec:
        maxUnavailable: 1
      readinessProbe:
        failureThreshold: 18
        httpGet:
          path: /druid/historical/v1/readiness
          port: 8088
        periodSeconds: 10
      replicas: 1
      resources:
        limits:
          cpu: 4
          memory: 3.5Gi
        requests:
          cpu: 1
          memory: 2Gi
      runtime.properties: |
        druid.plaintextPort=8088
        druid.service=druid/historical/cold
        druid.segmentCache.locations=[{"path":"/druid/data/segments","maxSize":1000000000}]

    routers:
      nodeType: "router"
      druid.port: 8088
      kind: Deployment
      services:
        - spec:
            type: ClusterIP
            clusterIP: None
      nodeConfigMountPath: "/opt/druid/conf/druid/cluster/query/router"
      replicas: 1
      runtime.properties: ""