grafana / helm-charts

Apache License 2.0
1.6k stars 2.24k forks source link

Errors loading rules Failed to load the data source configuration for Loki: Unable to fetch alert rules. Is the Loki data source properly configured? #2473

Open sangvi183 opened 1 year ago

sangvi183 commented 1 year ago

i am using loki/distributed helm chat . in Grafana dashboard under alerting i am getting below error Errors loading rules Failed to load the data source configuration for [Loki]: Unable to fetch alert rules. Is the Loki data source properly configured?

need help to resolve this issue

dark-brains commented 10 months ago

Its working for me. I'm using loki-distributed helm chart. Just in case if you are using gateway instead of ingress, you must configure loki datasource url as gateway service url. In case if you are using ingress instead of gateway you must configure loki tadasource url to your ingress host.

dark-brains commented 10 months ago

Example helm ovveride values.yaml configuration with AWS EKS + ELB

loki:
  server:
    http_listen_port: 3100
  config: |
    auth_enabled: false

    server:
      {{- toYaml .Values.loki.server | nindent 6 }}

    common:
      compactor_address: http://{{ include "loki.compactorFullname" . }}:3100

    distributor:
      ring:
        kvstore:
          store: memberlist

    memberlist:
      join_members:
        - {{ include "loki.fullname" . }}-memberlist

    ingester:
      lifecycler:
        ring:
          kvstore:
            store: memberlist
          replication_factor: 1
      chunk_idle_period: 30m
      chunk_block_size: 262144
      chunk_encoding: snappy
      chunk_retain_period: 1m
      max_transfer_retries: 0
      wal:
        dir: /var/loki/wal

    limits_config:
      enforce_metric_name: false
      reject_old_samples: true
      reject_old_samples_max_age: 168h
      max_cache_freshness_per_query: 2m
      split_queries_by_interval: 2m
      max_entries_limit_per_query: 5000000

    {{- if .Values.loki.schemaConfig}}
    schema_config:
    {{- toYaml .Values.loki.schemaConfig | nindent 2}}
    {{- end}}
    {{- if .Values.loki.storageConfig}}
    storage_config:
    {{- if .Values.indexGateway.enabled}}
    {{- $indexGatewayClient := dict "server_address" (printf "dns:///%s:9095" (include "loki.indexGatewayFullname" .)) }}
    {{- $_ := set .Values.loki.storageConfig.boltdb_shipper "index_gateway_client" $indexGatewayClient }}
    {{- end}}
    {{- toYaml .Values.loki.storageConfig | nindent 2}}
    {{- if .Values.memcachedIndexQueries.enabled }}
      index_queries_cache_config:
        memcached_client:
          addresses: dnssrv+_memcached-client._tcp.{{ include "loki.memcachedIndexQueriesFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}
          consistent_hash: true
    {{- end}}
    {{- end}}

    runtime_config:
      file: /var/{{ include "loki.name" . }}-runtime/runtime.yaml

    chunk_store_config:
      max_look_back_period: 0s
      {{- if .Values.memcachedChunks.enabled }}
      chunk_cache_config:
        embedded_cache:
          enabled: false
        memcached_client:
          consistent_hash: true
          addresses: dnssrv+_memcached-client._tcp.{{ include "loki.memcachedChunksFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}
      {{- end }}
      {{- if .Values.memcachedIndexWrites.enabled }}
      write_dedupe_cache_config:
        memcached_client:
          consistent_hash: true
          addresses: dnssrv+_memcached-client._tcp.{{ include "loki.memcachedIndexWritesFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}
      {{- end }}

    table_manager:
      retention_deletes_enabled: false
      retention_period: 0s

    query_range:
      align_queries_with_step: true
      max_retries: 1
      cache_results: true
      results_cache:
        cache:
          {{- if .Values.memcachedFrontend.enabled }}
          memcached_client:
            addresses: dnssrv+_memcached-client._tcp.{{ include "loki.memcachedFrontendFullname" . }}.{{ .Release.Namespace }}.svc.{{ .Values.global.clusterDomain }}
            consistent_hash: true
          {{- else }}
          embedded_cache:
            enabled: true
            ttl: 24h
          {{- end }}

    frontend_worker:
      {{- if .Values.queryScheduler.enabled }}
      scheduler_address: {{ include "loki.querySchedulerFullname" . }}:9095
      {{- else }}
      frontend_address: {{ include "loki.queryFrontendFullname" . }}-headless:9095
      {{- end }}

    frontend:
      log_queries_longer_than: 5s
      compress_responses: true
      {{- if .Values.queryScheduler.enabled }}
      scheduler_address: {{ include "loki.querySchedulerFullname" . }}:9095
      {{- end }}
      tail_proxy_url: http://{{ include "loki.querierFullname" . }}:3100

    compactor:
      shared_store: s3

    ruler:
      storage:
        type: local
        local:
          directory: /etc/loki/rules
      ring:
        kvstore:
          store: memberlist
      rule_path: /tmp/loki/scratch
      enable_api: true
      enable_alertmanager_v2: true      
      alertmanager_url: http://<YOUR_ALERTMANAGER_URL>

    compactor:
      working_directory: /var/loki/compactor
      shared_store: s3
      compaction_interval: 10m 
      retention_enabled: true
    limits_config:
      retention_period: 2160h
      max_query_length: 2200h
      max_entries_limit_per_query: 5000000

    schema_config:
      configs:
        - from: "2023-03-29"
          store: boltdb-shipper
          object_store: s3
          schema: v11
          index:
            period: 24h
            prefix: loki_index_

    storage_config:
      filesystem: null
      aws:
        region: us-east-1
        bucketnames: BUCKET-NAME
        s3forcepathstyle: false

      boltdb_shipper:
          active_index_directory: /var/loki/boltdb-shipper-active
          shared_store: s3
          cache_location: /var/loki/boltdb-shipper-cache
          cache_ttl: 24h

serviceAccount:
  create: true
  name: loki
  imagePullSecrets: []
  annotations:
    eks.amazonaws.com/role-arn: Role-ARN
  automountServiceAccountToken: true

ingester:
  kind: StatefulSet
  nodeSelector: &node_selector
    role: logging

ruler:
  enabled: true
  nodeSelector: &node_selector  
  directories: 
    fake:
      rules.yml: |
        groups:
          - name: ProductionErros
            interval: 1m
            rules:
              - alert: Production Erros
                expr: |
                  count_over_time({component!="", environment="prod"} |~ "[eE][rR][rR][oO][rR]" != "ERROR:root" | pattern `<message>`  [1m])
                for: 0m
                labels:
                  severity: critical
                  receiver: slack_logs
                annotations:
                  summary: 'Errors count {{ $value }}.'
                  title: 'Production {{ $labels.app }} errors'
                  cluster: 'prod-eks'
                  description: 'The application/{{ $labels.app }} threw errors.'
                  message: '{{ $labels.message }}'                              

distributor:
  nodeSelector: *node_selector

querier:
  nodeSelector: *node_selector

queryFrontend:
  nodeSelector: *node_selector

compactor:
  nodeSelector: *node_selector

indexGateway:
  nodeSelector: *node_selector

memcachedExporter:
  enabled: true

memcachedChunks:
  enabled: true
  nodeSelector: *node_selector

memcachedFrontend:
  enabled: true
  nodeSelector: *node_selector

memcachedIndexQueries:
  enabled: true
  nodeSelector: *node_selector

memcachedIndexWrites:
  enabled: true
  nodeSelector: *node_selector

gateway:
  enabled: false

ingress:
  enabled: true
  ingressClassName: alb
  annotations:
    alb.ingress.kubernetes.io/certificate-arn: "CERT-ARN"
    alb.ingress.kubernetes.io/group.name: "LB-Group_Name"
    alb.ingress.kubernetes.io/healthcheck-path: "/ready"
    alb.ingress.kubernetes.io/healthcheck-port: "traffic-port"
    alb.ingress.kubernetes.io/load-balancer-name: "LB-Name"
    alb.ingress.kubernetes.io/scheme: "internal"
    alb.ingress.kubernetes.io/target-type: "ip"
    external-dns.alpha.kubernetes.io/cloudflare-proxied: "false"
  paths:
    distributor:
      - /api/prom/push
      - /loki/api/v1/push
    querier:
      - /api/prom/tail
      - /loki/api/v1/tail
    query-frontend:
      - /loki/api
    ruler:
      - /api/prom/rules
      - /loki/api/v1/rules
      - /prometheus/api/v1/rules
      - /prometheus/api/v1/alerts
  hosts:
    - LOKI-HOST

In the Grafana configure the datasource like this:

URL: http(s)://<YOUR_INGRESS_HOST_NAME>

dark-brains commented 10 months ago

In case when you are using Gateway just disable ingress and enable gateway and in the Grafana configure the datasource like this:

URL: http://loki-distributed-gateway.<NAMESPACE>.svc.cluster.local

dark-brains commented 7 months ago

This configurations is working for me until now.