Closed renovate[bot] closed 2 weeks ago
--- kubernetes/apps/observability/victoria-metrics/app Kustomization: flux-system/victoria-metrics HelmRelease: observability/victoria-metrics
+++ kubernetes/apps/observability/victoria-metrics/app Kustomization: flux-system/victoria-metrics HelmRelease: observability/victoria-metrics
@@ -13,13 +13,13 @@
spec:
chart: victoria-metrics-k8s-stack
sourceRef:
kind: HelmRepository
name: victoria-metrics
namespace: flux-system
- version: 0.25.3
+ version: 0.25.5
interval: 30m
values:
alertmanager:
enabled: false
defaultDashboardsEnabled: false
defaultRules:
--- HelmRelease: observability/victoria-metrics Deployment: observability/victoria-metrics-victoria-metrics-operator
+++ HelmRelease: observability/victoria-metrics Deployment: observability/victoria-metrics-victoria-metrics-operator
@@ -20,13 +20,13 @@
app.kubernetes.io/name: victoria-metrics-operator
app.kubernetes.io/instance: victoria-metrics
spec:
serviceAccountName: victoria-metrics-victoria-metrics-operator
containers:
- name: operator
- image: victoriametrics/operator:v0.47.1
+ image: victoriametrics/operator:v0.47.2
imagePullPolicy: IfNotPresent
env:
- name: WATCH_NAMESPACE
value: ''
- name: POD_NAME
valueFrom:
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-alertmanager.rules
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-alertmanager.rules
@@ -35,14 +35,14 @@
summary: A member of an Alertmanager cluster has not found all other cluster
members.
expr: |-
# Without max_over_time, failed scrapes could create false negatives, see
# https://www.robustperception.io/alerting-on-gauges-in-prometheus-2-0 for details.
max_over_time(alertmanager_cluster_members{job="vmalertmanager-victoria-metrics",namespace="observability"}[5m])
- < on (namespace,service) group_left
- count by (namespace,service) (max_over_time(alertmanager_cluster_members{job="vmalertmanager-victoria-metrics",namespace="observability"}[5m]))
+ < on (namespace,service,cluster) group_left
+ count by (namespace,service,cluster) (max_over_time(alertmanager_cluster_members{job="vmalertmanager-victoria-metrics",namespace="observability"}[5m]))
for: 15m
labels:
severity: critical
- alert: AlertmanagerFailedToSendAlerts
annotations:
description: Alertmanager {{ $labels.namespace }}/{{ $labels.pod}} failed
@@ -66,13 +66,13 @@
}} sent from any instance in the {{$labels.job}} cluster is {{ $value |
humanizePercentage }}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
summary: All Alertmanager instances in a cluster failed to send notifications
to a critical integration.
expr: |-
- min by (namespace,service, integration) (
+ min by (namespace,service,integration,cluster) (
rate(alertmanager_notifications_failed_total{job="vmalertmanager-victoria-metrics",namespace="observability", integration=~`.*`}[5m])
/
ignoring (reason) group_left rate(alertmanager_notifications_total{job="vmalertmanager-victoria-metrics",namespace="observability", integration=~`.*`}[5m])
)
> 0.01
for: 5m
@@ -84,13 +84,13 @@
}} sent from any instance in the {{$labels.job}} cluster is {{ $value |
humanizePercentage }}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterfailedtosendalerts
summary: All Alertmanager instances in a cluster failed to send notifications
to a non-critical integration.
expr: |-
- min by (namespace,service, integration) (
+ min by (namespace,service,integration,cluster) (
rate(alertmanager_notifications_failed_total{job="vmalertmanager-victoria-metrics",namespace="observability", integration!~`.*`}[5m])
/
ignoring (reason) group_left rate(alertmanager_notifications_total{job="vmalertmanager-victoria-metrics",namespace="observability", integration!~`.*`}[5m])
)
> 0.01
for: 5m
@@ -100,14 +100,14 @@
annotations:
description: Alertmanager instances within the {{$labels.job}} cluster have
different configurations.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerconfiginconsistent
summary: Alertmanager instances within the same cluster have different configurations.
expr: |-
- count by (namespace,service) (
- count_values by (namespace,service) ("config_hash", alertmanager_config_hash{job="vmalertmanager-victoria-metrics",namespace="observability"})
+ count by (namespace,service,cluster) (
+ count_values by (namespace,service,cluster) ("config_hash", alertmanager_config_hash{job="vmalertmanager-victoria-metrics",namespace="observability"})
)
!= 1
for: 20m
labels:
severity: critical
- alert: AlertmanagerClusterDown
@@ -117,17 +117,17 @@
last 5m.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclusterdown
summary: Half or more of the Alertmanager instances within the same cluster
are down.
expr: |-
(
- count by (namespace,service) (
+ count by (namespace,service,cluster) (
avg_over_time(up{job="vmalertmanager-victoria-metrics",namespace="observability"}[5m]) < 0.5
)
/
- count by (namespace,service) (
+ count by (namespace,service,cluster) (
up{job="vmalertmanager-victoria-metrics",namespace="observability"}
)
)
>= 0.5
for: 5m
labels:
@@ -139,17 +139,17 @@
last 10m.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/alertmanager/alertmanagerclustercrashlooping
summary: Half or more of the Alertmanager instances within the same cluster
are crashlooping.
expr: |-
(
- count by (namespace,service) (
+ count by (namespace,service,cluster) (
changes(process_start_time_seconds{job="vmalertmanager-victoria-metrics",namespace="observability"}[10m]) > 4
)
/
- count by (namespace,service) (
+ count by (namespace,service,cluster) (
up{job="vmalertmanager-victoria-metrics",namespace="observability"}
)
)
>= 0.5
for: 5m
labels:
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-general.rules
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-general.rules
@@ -17,14 +17,14 @@
- alert: TargetDown
annotations:
description: '{{ printf "%.4g" $value }}% of the {{ $labels.job }}/{{ $labels.service
}} targets in {{ $labels.namespace }} namespace are down.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/targetdown
summary: One or more targets are unreachable.
- expr: 100 * (count(up == 0) BY (cluster, job, namespace, service) / count(up)
- BY (cluster, job, namespace, service)) > 10
+ expr: 100 * (count(up == 0) BY (job,namespace,service,cluster) / count(up) BY
+ (job,namespace,service,cluster)) > 10
for: 10m
labels:
severity: warning
- alert: Watchdog
annotations:
description: |
@@ -47,11 +47,12 @@
other alerts.
This alert fires whenever there's a severity="info" alert, and stops firing when another alert with a
severity of 'warning' or 'critical' starts firing on the same namespace.
This alert should be routed to a null receiver and configured to inhibit alerts with severity="info".
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/general/infoinhibitor
summary: Info-level alert inhibition.
- expr: ALERTS{severity = "info"} == 1 unless on(namespace) ALERTS{alertname !=
- "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} == 1
+ expr: ALERTS{severity = "info"} == 1 unless on (namespace,cluster) ALERTS{alertname
+ != "InfoInhibitor", severity =~ "warning|critical", alertstate="firing"} ==
+ 1
labels:
severity: none
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-k8s.rules.containercpuusagesecondstotal
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-k8s.rules.containercpuusagesecondstotal
@@ -13,14 +13,14 @@
groups:
- name: k8s.rules.container_cpu_usage_seconds_total
params: {}
rules:
- annotations: {}
expr: |-
- sum by (cluster, namespace, pod, container) (
+ sum by (namespace,pod,container,cluster) (
irate(container_cpu_usage_seconds_total{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}[5m])
- ) * on (cluster, namespace, pod) group_left(node) topk by (cluster, namespace, pod) (
- 1, max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
+ ) * on (namespace,pod,cluster) group_left(node) topk by (namespace,pod,cluster) (
+ 1, max by (namespace,pod,node,cluster) (kube_pod_info{node!=""})
)
labels: {}
record: node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-k8s.rules.containermemorycache
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-k8s.rules.containermemorycache
@@ -14,12 +14,12 @@
- name: k8s.rules.container_memory_cache
params: {}
rules:
- annotations: {}
expr: |-
container_memory_cache{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
- * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,
- max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
+ * on (namespace,pod,cluster) group_left(node) topk by (namespace,pod,cluster) (1,
+ max by (namespace,pod,node,cluster) (kube_pod_info{node!=""})
)
labels: {}
record: node_namespace_pod_container:container_memory_cache
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-k8s.rules.containermemoryrss
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-k8s.rules.containermemoryrss
@@ -14,12 +14,12 @@
- name: k8s.rules.container_memory_rss
params: {}
rules:
- annotations: {}
expr: |-
container_memory_rss{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
- * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,
- max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
+ * on (namespace,pod,cluster) group_left(node) topk by (namespace,pod,cluster) (1,
+ max by (namespace,pod,node,cluster) (kube_pod_info{node!=""})
)
labels: {}
record: node_namespace_pod_container:container_memory_rss
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-k8s.rules.containermemoryswap
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-k8s.rules.containermemoryswap
@@ -14,12 +14,12 @@
- name: k8s.rules.container_memory_swap
params: {}
rules:
- annotations: {}
expr: |-
container_memory_swap{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
- * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,
- max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
+ * on (namespace,pod,cluster) group_left(node) topk by (namespace,pod,cluster) (1,
+ max by (namespace,pod,node,cluster) (kube_pod_info{node!=""})
)
labels: {}
record: node_namespace_pod_container:container_memory_swap
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-k8s.rules.containermemoryworkingsetbytes
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-k8s.rules.containermemoryworkingsetbytes
@@ -14,12 +14,12 @@
- name: k8s.rules.container_memory_working_set_bytes
params: {}
rules:
- annotations: {}
expr: |-
container_memory_working_set_bytes{job="kubelet", metrics_path="/metrics/cadvisor", image!=""}
- * on (cluster, namespace, pod) group_left(node) topk by(cluster, namespace, pod) (1,
- max by(cluster, namespace, pod, node) (kube_pod_info{node!=""})
+ * on (namespace,pod,cluster) group_left(node) topk by (namespace,pod,cluster) (1,
+ max by (namespace,pod,node,cluster) (kube_pod_info{node!=""})
)
labels: {}
record: node_namespace_pod_container:container_memory_working_set_bytes
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-k8s.rules.containerresource
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-k8s.rules.containerresource
@@ -13,88 +13,88 @@
groups:
- name: k8s.rules.container_resource
params: {}
rules:
- annotations: {}
expr: |-
- kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster)
- group_left() max by (namespace, pod, cluster) (
+ kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"} * on (namespace,pod,cluster)
+ group_left() max by (namespace,pod,cluster) (
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
)
labels: {}
record: cluster:namespace:pod_memory:active:kube_pod_container_resource_requests
- annotations: {}
expr: |-
- sum by (namespace, cluster) (
- sum by (namespace, pod, cluster) (
- max by (namespace, pod, container, cluster) (
+ sum by (namespace,cluster) (
+ sum by (namespace,pod,cluster) (
+ max by (namespace,pod,container,cluster) (
kube_pod_container_resource_requests{resource="memory",job="kube-state-metrics"}
- ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (
+ ) * on (namespace,pod,cluster) group_left() max by (namespace,pod,cluster) (
kube_pod_status_phase{phase=~"Pending|Running"} == 1
)
)
)
labels: {}
record: namespace_memory:kube_pod_container_resource_requests:sum
- annotations: {}
expr: |-
- kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster)
- group_left() max by (namespace, pod, cluster) (
+ kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"} * on (namespace,pod,cluster)
+ group_left() max by (namespace,pod,cluster) (
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
)
labels: {}
record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests
- annotations: {}
expr: |-
- sum by (namespace, cluster) (
- sum by (namespace, pod, cluster) (
- max by (namespace, pod, container, cluster) (
+ sum by (namespace,cluster) (
+ sum by (namespace,pod,cluster) (
+ max by (namespace,pod,container,cluster) (
kube_pod_container_resource_requests{resource="cpu",job="kube-state-metrics"}
- ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (
+ ) * on (namespace,pod,cluster) group_left() max by (namespace,pod,cluster) (
kube_pod_status_phase{phase=~"Pending|Running"} == 1
)
)
)
labels: {}
record: namespace_cpu:kube_pod_container_resource_requests:sum
- annotations: {}
expr: |-
- kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on (namespace, pod, cluster)
- group_left() max by (namespace, pod, cluster) (
+ kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"} * on (namespace,pod,cluster)
+ group_left() max by (namespace,pod,cluster) (
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
)
labels: {}
record: cluster:namespace:pod_memory:active:kube_pod_container_resource_limits
- annotations: {}
expr: |-
- sum by (namespace, cluster) (
- sum by (namespace, pod, cluster) (
- max by (namespace, pod, container, cluster) (
+ sum by (namespace,cluster) (
+ sum by (namespace,pod,cluster) (
+ max by (namespace,pod,container,cluster) (
kube_pod_container_resource_limits{resource="memory",job="kube-state-metrics"}
- ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (
+ ) * on (namespace,pod,cluster) group_left() max by (namespace,pod,cluster) (
kube_pod_status_phase{phase=~"Pending|Running"} == 1
)
)
)
labels: {}
record: namespace_memory:kube_pod_container_resource_limits:sum
- annotations: {}
expr: |-
- kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on (namespace, pod, cluster)
- group_left() max by (namespace, pod, cluster) (
+ kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"} * on (namespace,pod,cluster)
+ group_left() max by (namespace,pod,cluster) (
(kube_pod_status_phase{phase=~"Pending|Running"} == 1)
)
labels: {}
record: cluster:namespace:pod_cpu:active:kube_pod_container_resource_limits
- annotations: {}
expr: |-
- sum by (namespace, cluster) (
- sum by (namespace, pod, cluster) (
- max by (namespace, pod, container, cluster) (
+ sum by (namespace,cluster) (
+ sum by (namespace,pod,cluster) (
+ max by (namespace,pod,container,cluster) (
kube_pod_container_resource_limits{resource="cpu",job="kube-state-metrics"}
- ) * on(namespace, pod, cluster) group_left() max by (namespace, pod, cluster) (
+ ) * on (namespace,pod,cluster) group_left() max by (namespace,pod,cluster) (
kube_pod_status_phase{phase=~"Pending|Running"} == 1
)
)
)
labels: {}
record: namespace_cpu:kube_pod_container_resource_limits:sum
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-k8s.rules.podowner
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-k8s.rules.podowner
@@ -13,53 +13,53 @@
groups:
- name: k8s.rules.pod_owner
params: {}
rules:
- annotations: {}
expr: |-
- max by (cluster, namespace, workload, pod) (
+ max by (namespace,workload,pod,cluster) (
label_replace(
label_replace(
kube_pod_owner{job="kube-state-metrics", owner_kind="ReplicaSet"},
"replicaset", "$1", "owner_name", "(.*)"
- ) * on(replicaset, namespace) group_left(owner_name) topk by(replicaset, namespace) (
- 1, max by (replicaset, namespace, owner_name) (
+ ) * on (replicaset,namespace,cluster) group_left(owner_name) topk by (replicaset,namespace,cluster) (
+ 1, max by (replicaset,namespace,owner_name,cluster) (
kube_replicaset_owner{job="kube-state-metrics"}
)
),
"workload", "$1", "owner_name", "(.*)"
)
)
labels:
workload_type: deployment
record: namespace_workload_pod:kube_pod_owner:relabel
- annotations: {}
expr: |-
- max by (cluster, namespace, workload, pod) (
+ max by (namespace,workload,pod,cluster) (
label_replace(
kube_pod_owner{job="kube-state-metrics", owner_kind="DaemonSet"},
"workload", "$1", "owner_name", "(.*)"
)
)
labels:
workload_type: daemonset
record: namespace_workload_pod:kube_pod_owner:relabel
- annotations: {}
expr: |-
- max by (cluster, namespace, workload, pod) (
+ max by (namespace,workload,pod,cluster) (
label_replace(
kube_pod_owner{job="kube-state-metrics", owner_kind="StatefulSet"},
"workload", "$1", "owner_name", "(.*)"
)
)
labels:
workload_type: statefulset
record: namespace_workload_pod:kube_pod_owner:relabel
- annotations: {}
expr: |-
- max by (cluster, namespace, workload, pod) (
+ max by (namespace,workload,pod,cluster) (
label_replace(
kube_pod_owner{job="kube-state-metrics", owner_kind="Job"},
"workload", "$1", "owner_name", "(.*)"
)
)
labels:
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kube-state-metrics
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kube-state-metrics
@@ -62,12 +62,12 @@
are not being exposed.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kube-state-metrics/kubestatemetricsshardsmissing
summary: kube-state-metrics shards are missing.
expr: |-
2^max(kube_state_metrics_total_shards{job="kube-state-metrics"}) by (cluster) - 1
-
- sum( 2 ^ max by (cluster, shard_ordinal) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) ) by (cluster)
+ sum( 2 ^ max by (shard_ordinal,cluster) (kube_state_metrics_shard_ordinal{job="kube-state-metrics"}) ) by (cluster)
!= 0
for: 15m
labels:
severity: critical
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kubelet.rules
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kubelet.rules
@@ -13,26 +13,26 @@
groups:
- name: kubelet.rules
params: {}
rules:
- annotations: {}
expr: histogram_quantile(0.99, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet",
- metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on(cluster, instance)
+ metrics_path="/metrics"}[5m])) by (instance,le,cluster) * on (instance,cluster)
group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
labels:
quantile: '0.99'
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
- annotations: {}
expr: histogram_quantile(0.9, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet",
- metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on(cluster, instance)
+ metrics_path="/metrics"}[5m])) by (instance,le,cluster) * on (instance,cluster)
group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
labels:
quantile: '0.9'
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
- annotations: {}
expr: histogram_quantile(0.5, sum(rate(kubelet_pleg_relist_duration_seconds_bucket{job="kubelet",
- metrics_path="/metrics"}[5m])) by (cluster, instance, le) * on(cluster, instance)
+ metrics_path="/metrics"}[5m])) by (instance,le,cluster) * on (instance,cluster)
group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"})
labels:
quantile: '0.5'
record: node_quantile:kubelet_pleg_relist_duration_seconds:histogram_quantile
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kubernetes-apps
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kubernetes-apps
@@ -29,17 +29,17 @@
annotations:
description: Pod {{ $labels.namespace }}/{{ $labels.pod }} has been in a non-ready
state for longer than 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubepodnotready
summary: Pod has been in a non-ready state for more than 15 minutes.
expr: |-
- sum by (namespace, pod, cluster) (
- max by(namespace, pod, cluster) (
+ sum by (namespace,pod,cluster) (
+ max by (namespace,pod,cluster) (
kube_pod_status_phase{job="kube-state-metrics", namespace=~".*", phase=~"Pending|Unknown|Failed"}
- ) * on(namespace, pod, cluster) group_left(owner_kind) topk by(namespace, pod, cluster) (
- 1, max by(namespace, pod, owner_kind, cluster) (kube_pod_owner{owner_kind!="Job"})
+ ) * on (namespace,pod,cluster) group_left(owner_kind) topk by (namespace,pod,cluster) (
+ 1, max by (namespace,pod,owner_kind,cluster) (kube_pod_owner{owner_kind!="Job"})
)
) > 0
for: 15m
labels:
severity: warning
- alert: KubeDeploymentGenerationMismatch
@@ -125,13 +125,13 @@
description: StatefulSet {{ $labels.namespace }}/{{ $labels.statefulset }}
update has not been rolled out.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubestatefulsetupdatenotrolledout
summary: StatefulSet update has not been rolled out.
expr: |-
(
- max by(namespace, statefulset) (
+ max by (namespace,statefulset,cluster) (
kube_statefulset_status_current_revision{job="kube-state-metrics", namespace=~".*"}
unless
kube_statefulset_status_update_revision{job="kube-state-metrics", namespace=~".*"}
)
*
(
@@ -184,13 +184,13 @@
annotations:
description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on
container {{ $labels.container}} has been in waiting state for longer than
1 hour.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
summary: Pod container waiting longer than 1 hour
- expr: sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics",
+ expr: sum by (namespace,pod,container,cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics",
namespace=~".*"}) > 0
for: 1h
labels:
severity: warning
- alert: KubeDaemonSetNotScheduled
annotations:
@@ -220,13 +220,13 @@
annotations:
description: Job {{ $labels.namespace }}/{{ $labels.job_name }} is taking
more than {{ "43200" | humanizeDuration }} to complete.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubejobnotcompleted
summary: Job did not complete in time
expr: |-
- time() - max by(namespace, job_name, cluster) (kube_job_status_start_time{job="kube-state-metrics", namespace=~".*"}
+ time() - max by (namespace,job_name,cluster) (kube_job_status_start_time{job="kube-state-metrics", namespace=~".*"}
and
kube_job_status_active{job="kube-state-metrics", namespace=~".*"} > 0) > 43200
labels:
severity: warning
- alert: KubeJobFailed
annotations:
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kubernetes-resources
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kubernetes-resources
@@ -116,14 +116,14 @@
description: '{{ $value | humanizePercentage }} throttling of CPU in namespace
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{
$labels.pod }}.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
summary: Processes experience elevated CPU throttling.
expr: |-
- sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (cluster, container, pod, namespace)
+ sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (container,pod,namespace,cluster)
/
- sum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace)
+ sum(increase(container_cpu_cfs_periods_total{}[5m])) by (container,pod,namespace,cluster)
> ( 25 / 100 )
for: 15m
labels:
severity: info
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kubernetes-storage
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kubernetes-storage
@@ -26,15 +26,15 @@
kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
/
kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
) < 0.03
and
kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
- unless on(cluster, namespace, persistentvolumeclaim)
+ unless on (namespace,persistentvolumeclaim,cluster)
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
- unless on(cluster, namespace, persistentvolumeclaim)
+ unless on (namespace,persistentvolumeclaim,cluster)
kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
for: 1m
labels:
severity: critical
- alert: KubePersistentVolumeFillingUp
annotations:
@@ -51,15 +51,15 @@
kubelet_volume_stats_capacity_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
) < 0.15
and
kubelet_volume_stats_used_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
and
predict_linear(kubelet_volume_stats_available_bytes{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
- unless on(cluster, namespace, persistentvolumeclaim)
+ unless on (namespace,persistentvolumeclaim,cluster)
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
- unless on(cluster, namespace, persistentvolumeclaim)
+ unless on (namespace,persistentvolumeclaim,cluster)
kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
for: 1h
labels:
severity: warning
- alert: KubePersistentVolumeInodesFillingUp
annotations:
@@ -73,15 +73,15 @@
kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}
/
kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
) < 0.03
and
kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
- unless on(cluster, namespace, persistentvolumeclaim)
+ unless on (namespace,persistentvolumeclaim,cluster)
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
- unless on(cluster, namespace, persistentvolumeclaim)
+ unless on (namespace,persistentvolumeclaim,cluster)
kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
for: 1m
labels:
severity: critical
- alert: KubePersistentVolumeInodesFillingUp
annotations:
@@ -99,15 +99,15 @@
kubelet_volume_stats_inodes{job="kubelet", namespace=~".*", metrics_path="/metrics"}
) < 0.15
and
kubelet_volume_stats_inodes_used{job="kubelet", namespace=~".*", metrics_path="/metrics"} > 0
and
predict_linear(kubelet_volume_stats_inodes_free{job="kubelet", namespace=~".*", metrics_path="/metrics"}[6h], 4 * 24 * 3600) < 0
- unless on(cluster, namespace, persistentvolumeclaim)
+ unless on (namespace,persistentvolumeclaim,cluster)
kube_persistentvolumeclaim_access_mode{ access_mode="ReadOnlyMany"} == 1
- unless on(cluster, namespace, persistentvolumeclaim)
+ unless on (namespace,persistentvolumeclaim,cluster)
kube_persistentvolumeclaim_labels{label_excluded_from_alerts="true"} == 1
for: 1h
labels:
severity: warning
- alert: KubePersistentVolumeErrors
annotations:
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kubernetes-system-apiserver
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kubernetes-system-apiserver
@@ -18,47 +18,49 @@
annotations:
description: A client certificate used to authenticate to kubernetes apiserver
is expiring in less than 7.0 days.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
summary: Client certificate is about to expire.
expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"}
- > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m])))
+ > 0 and on (job,cluster) histogram_quantile(0.01, sum by (job,le,cluster)
+ (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m])))
< 604800
for: 5m
labels:
severity: warning
- alert: KubeClientCertificateExpiration
annotations:
description: A client certificate used to authenticate to kubernetes apiserver
is expiring in less than 24.0 hours.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
summary: Client certificate is about to expire.
expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"}
- > 0 and on(job) histogram_quantile(0.01, sum by (job, le) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m])))
+ > 0 and on (job,cluster) histogram_quantile(0.01, sum by (job,le,cluster)
+ (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m])))
< 86400
for: 5m
labels:
severity: critical
- alert: KubeAggregatedAPIErrors
annotations:
description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace
}} has reported errors. It has appeared unavailable {{ $value | humanize
}} times averaged over the past 10m.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapierrors
summary: Kubernetes aggregated API has reported errors.
- expr: sum by(name, namespace, cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m]))
+ expr: sum by (name,namespace,cluster)(increase(aggregator_unavailable_apiservice_total{job="apiserver"}[10m]))
> 4
labels:
severity: warning
- alert: KubeAggregatedAPIDown
annotations:
description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace
}} has been only {{ $value | humanize }}% available over the last 10m.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeaggregatedapidown
summary: Kubernetes aggregated API is down.
- expr: (1 - max by(name, namespace, cluster)(avg_over_time(aggregator_unavailable_apiservice{job="apiserver"}[10m])))
+ expr: (1 - max by (name,namespace,cluster)(avg_over_time(aggregator_unavailable_apiservice{job="apiserver"}[10m])))
* 100 < 85
for: 5m
labels:
severity: warning
- alert: KubeAPITerminatedRequests
annotations:
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kubernetes-system-kubelet
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kubernetes-system-kubelet
@@ -40,30 +40,30 @@
annotations:
description: Kubelet '{{ $labels.node }}' is running at {{ $value | humanizePercentage
}} of its Pod capacity.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubelettoomanypods
summary: Kubelet is running at capacity.
expr: |-
- count by(cluster, node) (
- (kube_pod_status_phase{job="kube-state-metrics",phase="Running"} == 1) * on(instance,pod,namespace,cluster) group_left(node) topk by(instance,pod,namespace,cluster) (1, kube_pod_info{job="kube-state-metrics"})
+ count by (node,cluster) (
+ (kube_pod_status_phase{job="kube-state-metrics",phase="Running"} == 1) * on (instance,pod,namespace,cluster) group_left(node) topk by (instance,pod,namespace,cluster) (1, kube_pod_info{job="kube-state-metrics"})
)
/
- max by(cluster, node) (
+ max by (node,cluster) (
kube_node_status_capacity{job="kube-state-metrics",resource="pods"} != 1
) > 0.95
for: 15m
labels:
severity: info
- alert: KubeNodeReadinessFlapping
annotations:
description: The readiness status of node {{ $labels.node }} has changed {{
$value }} times in the last 15 minutes.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubenodereadinessflapping
summary: Node readiness status is flapping.
expr: sum(changes(kube_node_status_condition{job="kube-state-metrics",status="true",condition="Ready"}[15m]))
- by (cluster, node) > 2
+ by (node,cluster) > 2
for: 15m
labels:
severity: warning
- alert: KubeletPlegDurationHigh
annotations:
description: The Kubelet Pod Lifecycle Event Generator has a 99th percentile
@@ -79,13 +79,13 @@
annotations:
description: Kubelet Pod startup 99th percentile latency is {{ $value }} seconds
on node {{ $labels.node }}.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeletpodstartuplatencyhigh
summary: Kubelet Pod startup latency is too high.
expr: histogram_quantile(0.99, sum(rate(kubelet_pod_worker_duration_seconds_bucket{job="kubelet",
- metrics_path="/metrics"}[5m])) by (cluster, instance, le)) * on(cluster, instance)
+ metrics_path="/metrics"}[5m])) by (instance,le,cluster)) * on (instance,cluster)
group_left(node) kubelet_node_name{job="kubelet", metrics_path="/metrics"}
> 60
for: 15m
labels:
severity: warning
- alert: KubeletClientCertificateExpiration
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kubernetes-system
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-kubernetes-system
@@ -17,26 +17,26 @@
- alert: KubeVersionMismatch
annotations:
description: There are {{ $value }} different semantic versions of Kubernetes
components running.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeversionmismatch
summary: Different semantic versions of Kubernetes components running.
- expr: count by (cluster) (count by (git_version, cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*")))
+ expr: count by (cluster) (count by (git_version,cluster) (label_replace(kubernetes_build_info{job!~"kube-dns|coredns"},"git_version","$1","git_version","(v[0-9]*.[0-9]*).*")))
> 1
for: 15m
labels:
severity: warning
- alert: KubeClientErrors
annotations:
description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance
}}' is experiencing {{ $value | humanizePercentage }} errors.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclienterrors
summary: Kubernetes API server client is experiencing errors.
expr: |-
- (sum(rate(rest_client_requests_total{job="apiserver",code=~"5.."}[5m])) by (cluster, instance, job, namespace)
+ (sum(rate(rest_client_requests_total{job="apiserver",code=~"5.."}[5m])) by (instance,job,namespace,cluster)
/
- sum(rate(rest_client_requests_total{job="apiserver"}[5m])) by (cluster, instance, job, namespace))
+ sum(rate(rest_client_requests_total{job="apiserver"}[5m])) by (instance,job,namespace,cluster))
> 0.01
for: 15m
labels:
severity: warning
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-node.rules
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-node.rules
@@ -13,24 +13,24 @@
groups:
- name: node.rules
params: {}
rules:
- annotations: {}
expr: |-
- topk by(cluster, namespace, pod) (1,
- max by (cluster, node, namespace, pod) (
+ topk by (namespace,pod,cluster) (1,
+ max by (node,namespace,pod,cluster) (
label_replace(kube_pod_info{job="kube-state-metrics",node!=""}, "pod", "$1", "pod", "(.*)")
))
labels: {}
record: 'node_namespace_pod:kube_pod_info:'
- annotations: {}
expr: |-
- count by (cluster, node) (
+ count by (node,cluster) (
node_cpu_seconds_total{mode="idle",job="node-exporter"}
- * on (cluster, namespace, pod) group_left(node)
- topk by(cluster, namespace, pod) (1, node_namespace_pod:kube_pod_info:)
+ * on (namespace,pod,cluster) group_left(node)
+ topk by (namespace,pod,cluster) (1, node_namespace_pod:kube_pod_info:)
)
labels: {}
record: node:node_num_cpu:sum
- annotations: {}
expr: |-
sum(
@@ -43,13 +43,13 @@
)
) by (cluster)
labels: {}
record: :node_memory_MemAvailable_bytes:sum
- annotations: {}
expr: |-
- avg by (cluster, node) (
+ avg by (node,cluster) (
sum without (mode) (
rate(node_cpu_seconds_total{mode!="idle",mode!="iowait",mode!="steal",job="node-exporter"}[5m])
)
)
labels: {}
record: node:node_cpu_utilization:ratio_rate5m
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-vmcluster
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-vmcluster
@@ -43,16 +43,16 @@
description: |-
Disk utilisation on instance {{ $labels.instance }} is more than 80%.
Having less than 20% of free disk space could cripple merges processes and overall performance. Consider to limit the ingestion rate, decrease retention or scale the disk space if possible.
summary: Instance {{ $labels.instance }} (job={{ $labels.job }}) will run
out of disk space soon
expr: |-
- sum(vm_data_size_bytes) by(job, instance) /
+ sum(vm_data_size_bytes) by (job,instance,cluster) /
(
- sum(vm_free_disk_space_bytes) by(job, instance) +
- sum(vm_data_size_bytes) by(job, instance)
+ sum(vm_free_disk_space_bytes) by (job,instance,cluster) +
+ sum(vm_data_size_bytes) by (job,instance,cluster)
) > 0.8
for: 30m
labels:
severity: critical
- alert: RequestErrorsToAPI
annotations:
@@ -75,17 +75,17 @@
RPC errors are interconnection errors between cluster components.
Possible reasons for errors are misconfiguration, overload, network blips or unreachable components.
summary: Too many RPC errors for {{ $labels.job }} (instance {{ $labels.instance
}})
expr: |-
(
- sum(increase(vm_rpc_connection_errors_total[5m])) by(job, instance)
+ sum(increase(vm_rpc_connection_errors_total[5m])) by (job,instance,cluster)
+
- sum(increase(vm_rpc_dial_errors_total[5m])) by(job, instance)
+ sum(increase(vm_rpc_dial_errors_total[5m])) by (job,instance,cluster)
+
- sum(increase(vm_rpc_handshake_errors_total[5m])) by(job, instance)
+ sum(increase(vm_rpc_handshake_errors_total[5m])) by (job,instance,cluster)
) > 0
for: 15m
labels:
severity: warning
show_at: dashboard
- alert: RowsRejectedOnIngestion
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-vmoperator
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-vmoperator
@@ -62,13 +62,13 @@
expr: |-
sum(
workqueue_depth{
job=~".*((victoria.*)|vm)-?operator",
name=~"(vmagent|vmalert|vmalertmanager|vmauth|vmcluster|vmnodescrape|vmpodscrape|vmprobe|vmrule|vmservicescrape|vmsingle|vmstaticscrape)"
}
- ) by(name) > 10
+ ) by (name,cluster) > 10
for: 15m
labels:
severity: warning
show_at: dashboard
- alert: BadObjects
annotations:
@@ -77,12 +77,12 @@
description: Operator got incorrect resources in controller {{ $labels.controller
}}, check operator logs
summary: Incorrect `{{ $labels.controller }}` resources in the cluster
expr: |-
sum(
operator_controller_bad_objects_count{job=~".*((victoria.*)|vm)-?operator"}
- ) by(controller) > 0
+ ) by (controller,cluster) > 0
for: 15m
labels:
severity: warning
show_at: dashboard
--- HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-vmsingle
+++ HelmRelease: observability/victoria-metrics VMRule: observability/victoria-metrics-vmsingle
@@ -43,16 +43,16 @@
description: |-
Disk utilisation on instance {{ $labels.instance }} is more than 80%.
Having less than 20% of free disk space could cripple merge processes and overall performance. Consider to limit the ingestion rate, decrease retention or scale the disk space if possible.
summary: Instance {{ $labels.instance }} (job={{ $labels.job }}) will run
out of disk space soon
expr: |-
- sum(vm_data_size_bytes) by(job, instance) /
+ sum(vm_data_size_bytes) by (job,instance,cluster) /
(
- sum(vm_free_disk_space_bytes) by(job, instance) +
- sum(vm_data_size_bytes) by(job, instance)
+ sum(vm_free_disk_space_bytes) by (job,instance,cluster) +
+ sum(vm_data_size_bytes) by (job,instance,cluster)
) > 0.8
for: 30m
labels:
severity: critical
- alert: RequestErrorsToAPI
annotations:
@@ -86,15 +86,15 @@
This effect is known as Churn Rate.
High Churn Rate tightly connected with database performance and may result in unexpected OOM's or slow queries.
summary: Churn rate is more than 10% on "{{ $labels.instance }}" for the last
15m
expr: |-
(
- sum(rate(vm_new_timeseries_created_total[5m])) by(instance)
+ sum(rate(vm_new_timeseries_created_total[5m])) by (instance,cluster)
/
- sum(rate(vm_rows_inserted_total[5m])) by (instance)
+ sum(rate(vm_rows_inserted_total[5m])) by (instance,cluster)
) > 0.1
for: 15m
labels:
severity: warning
- alert: TooHighChurnRate24h
annotations:
@@ -104,15 +104,15 @@
The number of created new time series over last 24h is 3x times higher than current number of active series on "{{ $labels.instance }}".
This effect is known as Churn Rate.
High Churn Rate tightly connected with database performance and may result in unexpected OOM's or slow queries.
summary: Too high number of new series on "{{ $labels.instance }}" created
over last 24h
expr: |-
- sum(increase(vm_new_timeseries_created_total[24h])) by(instance)
+ sum(increase(vm_new_timeseries_created_total[24h])) by (instance,cluster)
>
- (sum(vm_cache_entries{type="storage/hour_metric_ids"}) by(instance) * 3)
+ (sum(vm_cache_entries{type="storage/hour_metric_ids"}) by (instance,cluster) * 3)
for: 15m
labels:
severity: warning
- alert: TooHighSlowInsertsRate
annotations:
dashboard: grafana.domain.com/d/wNf0q_kZk?viewPanel=68&var-instance={{ $labels.instance
@@ -122,15 +122,15 @@
is needed for optimal handling of the current number of active time series.
See also https://github.com/VictoriaMetrics/VictoriaMetrics/issues/3976#issuecomment-1476883183
summary: Percentage of slow inserts is more than 5% on "{{ $labels.instance
}}" for the last 15m
expr: |-
(
- sum(rate(vm_slow_row_inserts_total[5m])) by(instance)
+ sum(rate(vm_slow_row_inserts_total[5m])) by (instance,cluster)
/
- sum(rate(vm_rows_inserted_total[5m])) by (instance)
+ sum(rate(vm_rows_inserted_total[5m])) by (instance,cluster)
) > 0.05
for: 15m
labels:
severity: warning
- alert: LabelsLimitExceededOnIngestion
annotations:
--- HelmRelease: observability/victoria-metrics ValidatingWebhookConfiguration: observability/victoria-metrics-victoria-metrics-operator-admission
+++ HelmRelease: observability/victoria-metrics ValidatingWebhookConfiguration: observability/victoria-metrics-victoria-metrics-operator-admission
@@ -10,13 +10,13 @@
webhooks:
- clientConfig:
service:
namespace: observability
name: victoria-metrics-victoria-metrics-operator
path: /validate-operator-victoriametrics-com-v1beta1-vlogs
- caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURIakNDQWdhZ0F3SUJBZ0lSQVB3a0hwV0hoVXErYm1ydWlLRkZocmt3RFFZSktvWklodmNOQVFFTEJRQXcKR1RFWE1CVUdBMVVFQXhNT2RtMHRiM0JsY21GMGIzSXRZMkV3SGhjTk1qUXdPREkyTWpFd056STBXaGNOTXpRdwpPREkwTWpFd056STBXakFaTVJjd0ZRWURWUVFERXc1MmJTMXZjR1Z5WVhSdmNpMWpZVENDQVNJd0RRWUpLb1pJCmh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBS3dGMXV4Smx0ZzMxdmhBMTgrVkZ3bmdsb2xZaVRiOWd2WEcKSWdXa1dyaVlJNGpXaUJxK1p0WmhRQWhITGhDWjJRMjZmSkdENktLaGVVY2Z3SUtIQVNEd1BORlNuczF5N3kzUApRbTlOakVuOFI2VVNBaVVoUGlRWnVielFMK3IvSjI3YkNHWDRiSXdRZktab29DMGV6K3BJNXAybFZwNGtXVHE4ClhOMHhVeFB4OUNNci9BMnJtVnBDWlBXQXdyalUwL01qajFsWmU1WUJNMEdveU9FZXI0UmZ1T3EwNkFqK0k1QVUKaGJKR2FGNEJSVnpjSFVwWmZQeW5Qb3NacWlXOFNSa1duYjlNYzQ2a210QXQyV3JQTTZpU0o4bXRkVWtaN1E0QgpzMHUzd1JzSUg4NEQwOE10cGxiU3ZTc2FSRDJlSjdUSjdhc3c0TTU3eWpQOFpxK2paVGNDQXdFQUFhTmhNRjh3CkRnWURWUjBQQVFIL0JBUURBZ0trTUIwR0ExVWRKUVFXTUJRR0NDc0dBUVVGQndNQkJnZ3JCZ0VGQlFjREFqQVAKQmdOVkhSTUJBZjhFQlRBREFRSC9NQjBHQTFVZERnUVdCQlI1eUVEQmZwTHczUUJmN3FTcUFyRmlRQ0wrd3pBTgpCZ2txaGtpRzl3MEJBUXNGQUFPQ0FRRUFPbXdtTDhzQ09Wek5FRkswYkNWRDEycFJUK0RaaFEwZTI5SnRtRVF6CmtJbW96T3Z4MkwwS0t6Q04vTEpPSjhzaURiRVVGU09NVlE3MURnZlpkTURKR2NzWE9seDdTVm9QNlcvNHcyTTIKMlROWUR0K0lFVUdoOUd0ZEdrTjQ1VGdNaWFHdU5JQUdnQktUQTBhMHZvQTJMZUQvZEV5UFRqcTlvdzJISzRIYwoxT3RETENId3dLT1lmSStobFNrempXZjFYcTM2bUg4ZGcvc2d3ai9KK1l3cXZlSVEzRU9SaFBtaE1QYzBMNThJClp1Q2lvZFVBek9ma0I3b0YrV0lOU2V2bjVzckxXYUlEL3J5c3NFRFVCQW9DVUxzN2ZTclJTSFFvcENuRC9pcU0KclVCQ21GdTVqVk1pYk0xem0zOVlRSm9hSmI3L1hJeTBFcTVhOHBVWkhuS1d4UT09Ci0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K
+ caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURIVENDQWdXZ0F3SUJBZ0lRUjcrZVJ2cGdObENJRmExZklLNEFSREFOQmdrcWhraUc5dzBCQVFzRkFEQVoKTVJjd0ZRWURWUVFERXc1MmJTMXZjR1Z5WVhSdmNpMWpZVEFlRncweU5EQTRNall5TVRBM01qUmFGdzB6TkRBNApNalF5TVRBM01qUmFNQmt4RnpBVkJnTlZCQU1URG5adExXOXdaWEpoZEc5eUxXTmhNSUlCSWpBTkJna3Foa2lHCjl3MEJBUUVGQUFPQ0FROEFNSUlCQ2dLQ0FRRUFvTTlJYzFSUkFhRnBBZmQzZ3dsNEtrbFR2QUJvTXp1NlF6RUQKVDhIMTVKblJadEhoUVczU0l2MUNWYXJnb092T2dUbkY0d3pPQ0dxTkRWcEE3MVJleUlzOENTY05UQ051cTNGMgpkK2k5UE5Peko5c1d6ekhOSitQZFJzNHJWVXZIWVVUS05yYThtSnJRc0JabVNpRTc0SEhPckpqVnlwcDM5anZPClk0Rm1ZWXEwRGI1dEplc3JzbkY3UDZkWW5sYWk2TFRHSGRGZ29ZZlJ0a3ZoTmVRTHVIRU9rQTM1M0hOTnExdkgKZkZvTjF4ZkV6MlZ1Y3A1WnYvNHNHSmJOUHFRZlhDcGUxZDhpSlBRVDFpQnhNZmtCYWVoZ2YzMGUvYm5SQi9ZVgpyM0JQY0Q0ZjBHOUZYMWRuUi9RcVkxQ3pOYnZUZFhoTGpEYzBZTzROeUUzazE3UUsrd0lEQVFBQm8yRXdYekFPCkJnTlZIUThCQWY4RUJBTUNBcVF3SFFZRFZSMGxCQll3RkFZSUt3WUJCUVVIQXdFR0NDc0dBUVVGQndNQ01BOEcKQTFVZEV3RUIvd1FGTUFNQkFmOHdIUVlEVlIwT0JCWUVGSUhpUEdlUTBmZmg1V2FCVWg4M0pLWlJxWUlnTUEwRwpDU3FHU0liM0RRRUJDd1VBQTRJQkFRQitBUUR3ZlZMZVprSnVwOGpwMnFJTjViQ2pqWDNzNWFKNDBab3RzZzF4CmpZb2c3UEhKd0ZDS2lteEJxbEJDZHRhSHVwMngyekpnRjV1UTM0SGhqVGdaemFoOWg3Wk1NZXgzQy9GRE1Mc0MKTVFzbi9rUktLZWZSRlZVaW5vSnd2OHpLQjJRK2s1bTZSSURVYjJVcjdMT3NsbWxnUlJTaEgxM3RtTG5JYzJaSQpXalkrQ2hoOXlRdnVxbHZNQW9aR2x2NzdyWTIvRC80K05LMWhnc01XTnBqMlIvSFd6VEpSZ2M0bmxmQlFUaGR1CjQ4UThYMlhNVFBzRXEzSGg4MXBzVmVSTUV4YUZZTHhRYlNzelo4MXVZWmtkSkF5V25TVmxIdHhXS1FNb2lNM2sKUWFqQnpwQTk1T3hJa2RrYzZLUTBkVnlKWXRFc1B5NzIvQXV6VDc1YlM1M3UKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=
failurePolicy: Fail
name: vlogs.victoriametrics.com
admissionReviewVersions:
- v1
- v1beta1
sideEffects: None
@@ -38,13 +38,13 @@
- vlogs
- clientConfig:
service:
namespace: observability
name: victoria-metrics-victoria-metrics-operator
path: /validate-operator-victoriametrics-com-v1beta1-vmagent
- caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURIakNDQWdhZ0F3SUJBZ0lSQVB3a0hwV0hoVXErYm1ydWlLRkZocmt3RFFZSktvWklodmNOQVFFTEJRQXcKR1RFWE1CVUdBMVVFQXhNT2RtMHRiM0JsY21GMGIzSXRZMkV3SGhjTk1qUXdPREkyTWpFd056STBXaGNOTXpRdwpPREkwTWpFd056STBXakFaTVJjd0ZRWURWUVFERXc1MmJTMXZjR1Z5WVhSdmNpMWpZVENDQVNJd0RRWUpLb1pJCmh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBS3dGMXV4Smx0ZzMxdmhBMTgrVkZ3bmdsb2xZaVRiOWd2WEcKSWdXa1dyaVlJNGpXaUJxK1p0WmhRQWhITGhDWjJRMjZmSkdENktLaGVVY2Z3SUtIQVNEd1BORlNuczF5N3kzUApRbTlOakVuOFI2VVNBaVVoUGlRWnVielFMK3IvSjI3YkNHWDRiSXdRZktab29DMGV6K3BJNXAybFZwNGtXVHE4ClhOMHhVeFB4OUNNci9BMnJtVnBDWlBXQXdyalUwL01qajFsWmU1WUJNMEdveU9FZXI0UmZ1T3EwNkFqK0k1QVUKaGJKR2FGNEJSVnpjSFVwWmZQeW5Qb3NacWlXOFNSa1duYjlNYzQ2a210QXQyV3JQTTZpU0o4bXRkVWtaN1E0QgpzMHUzd1JzSUg4NEQwOE10cGxiU3ZTc2FSRDJlSjdUSjdhc3c0TTU3eWpQOFpxK2paVGNDQXdFQUFhTmhNRjh3CkRnWURWUjBQQVFIL0JBUURBZ0trTUIwR0ExVWRKUVFXTUJRR0NDc0dBUVVGQndNQkJnZ3JCZ0VGQlFjREFqQVAKQmdOVkhSTUJBZjhFQlRBREFRSC9NQjBHQTFVZERnUVdCQlI1eUVEQmZwTHczUUJmN3FTcUFyRmlRQ0wrd3pBTgpCZ2txaGtpRzl3MEJBUXNGQUFPQ0FRRUFPbXdtTDhzQ09Wek5FRkswYkNWRDEycFJUK0RaaFEwZTI5SnRtRVF6CmtJbW96T3Z4MkwwS0t6Q04vTEpPSjhzaURiRVVGU09NVlE3MURnZlpkTURKR2NzWE9seDdTVm9QNlcvNHcyTTIKMlROWUR0K0lFVUdoOUd0ZEdrTjQ1VGdNaWFHdU5JQUdnQktUQTBhMHZvQTJMZUQvZEV5UFRqcTlvdzJISzRIYwoxT3RETENId3dLT1lmSStobFNrempXZjFYcTM2bUg4ZGcvc2d3ai9KK1l3cXZlSVEzRU9SaFBtaE1QYzBMNThJClp1Q2lvZFVBek9ma0I3b0YrV0lOU2V2bjVzckxXYUlEL3J5c3NFRFVCQW9DVUxzN2ZTclJTSFFvcENuRC9pcU0KclVCQ21GdTVqVk1pYk0xem0zOVlRSm9hSmI3L1hJeTBFcTVhOHBVWkhuS1d4UT09Ci0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K
+ caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURIVENDQWdXZ0F3SUJBZ0lRUjcrZVJ2cGdObENJRmExZklLNEFSREFOQmdrcWhraUc5dzBCQVFzRkFEQVoKTVJjd0ZRWURWUVFERXc1MmJTMXZjR1Z5WVhSdmNpMWpZVEFlRncweU5EQTRNall5TVRBM01qUmFGdzB6TkRBNApNalF5TVRBM01qUmFNQmt4RnpBVkJnTlZCQU1URG5adExXOXdaWEpoZEc5eUxXTmhNSUlCSWpBTkJna3Foa2lHCjl3MEJBUUVGQUFPQ0FROEFNSUlCQ2dLQ0FRRUFvTTlJYzFSUkFhRnBBZmQzZ3dsNEtrbFR2QUJvTXp1NlF6RUQKVDhIMTVKblJadEhoUVczU0l2MUNWYXJnb092T2dUbkY0d3pPQ0dxTkRWcEE3MVJleUlzOENTY05UQ051cTNGMgpkK2k5UE5Peko5c1d6ekhOSitQZFJzNHJWVXZIWVVUS05yYThtSnJRc0JabVNpRTc0SEhPckpqVnlwcDM5anZPClk0Rm1ZWXEwRGI1dEplc3JzbkY3UDZkWW5sYWk2TFRHSGRGZ29ZZlJ0a3ZoTmVRTHVIRU9rQTM1M0hOTnExdkgKZkZvTjF4ZkV6MlZ1Y3A1WnYvNHNHSmJOUHFRZlhDcGUxZDhpSlBRVDFpQnhNZmtCYWVoZ2YzMGUvYm5SQi9ZVgpyM0JQY0Q0ZjBHOUZYMWRuUi9RcVkxQ3pOYnZUZFhoTGpEYzBZTzROeUUzazE3UUsrd0lEQVFBQm8yRXdYekFPCkJnTlZIUThCQWY4RUJBTUNBcVF3SFFZRFZSMGxCQll3RkFZSUt3WUJCUVVIQXdFR0NDc0dBUVVGQndNQ01BOEcKQTFVZEV3RUIvd1FGTUFNQkFmOHdIUVlEVlIwT0JCWUVGSUhpUEdlUTBmZmg1V2FCVWg4M0pLWlJxWUlnTUEwRwpDU3FHU0liM0RRRUJDd1VBQTRJQkFRQitBUUR3ZlZMZVprSnVwOGpwMnFJTjViQ2pqWDNzNWFKNDBab3RzZzF4CmpZb2c3UEhKd0ZDS2lteEJxbEJDZHRhSHVwMngyekpnRjV1UTM0SGhqVGdaemFoOWg3Wk1NZXgzQy9GRE1Mc0MKTVFzbi9rUktLZWZSRlZVaW5vSnd2OHpLQjJRK2s1bTZSSURVYjJVcjdMT3NsbWxnUlJTaEgxM3RtTG5JYzJaSQpXalkrQ2hoOXlRdnVxbHZNQW9aR2x2NzdyWTIvRC80K05LMWhnc01XTnBqMlIvSFd6VEpSZ2M0bmxmQlFUaGR1CjQ4UThYMlhNVFBzRXEzSGg4MXBzVmVSTUV4YUZZTHhRYlNzelo4MXVZWmtkSkF5V25TVmxIdHhXS1FNb2lNM2sKUWFqQnpwQTk1T3hJa2RrYzZLUTBkVnlKWXRFc1B5NzIvQXV6VDc1YlM1M3UKLS0tLS1FTkQgQ0VSVElGSUNBVEUtLS0tLQo=
failurePolicy: Fail
name: vmagent.victoriametrics.com
admissionReviewVersions:
- v1
- v1beta1
sideEffects: None
@@ -66,13 +66,13 @@
- vmagents
- clientConfig:
service:
namespace: observability
name: victoria-metrics-victoria-metrics-operator
path: /validate-operator-victoriametrics-com-v1beta1-vmalert
- caBundle: LS0tLS1CRUdJTiBDRVJUSUZJQ0FURS0tLS0tCk1JSURIakNDQWdhZ0F3SUJBZ0lSQVB3a0hwV0hoVXErYm1ydWlLRkZocmt3RFFZSktvWklodmNOQVFFTEJRQXcKR1RFWE1CVUdBMVVFQXhNT2RtMHRiM0JsY21GMGIzSXRZMkV3SGhjTk1qUXdPREkyTWpFd056STBXaGNOTXpRdwpPREkwTWpFd056STBXakFaTVJjd0ZRWURWUVFERXc1MmJTMXZjR1Z5WVhSdmNpMWpZVENDQVNJd0RRWUpLb1pJCmh2Y05BUUVCQlFBRGdnRVBBRENDQVFvQ2dnRUJBS3dGMXV4Smx0ZzMxdmhBMTgrVkZ3bmdsb2xZaVRiOWd2WEcKSWdXa1dyaVlJNGpXaUJxK1p0WmhRQWhITGhDWjJRMjZmSkdENktLaGVVY2Z3SUtIQVNEd1BORlNuczF5N3kzUApRbTlOakVuOFI2VVNBaVVoUGlRWnVielFMK3IvSjI3YkNHWDRiSXdRZktab29DMGV6K3BJNXAybFZwNGtXVHE4ClhOMHhVeFB4OUNNci9BMnJtVnBDWlBXQXdyalUwL01qajFsWmU1WUJNMEdveU9FZXI0UmZ1T3EwNkFqK0k1QVUKaGJKR2FGNEJSVnpjSFVwWmZQeW5Qb3NacWlXOFNSa1duYjlNYzQ2a210QXQyV3JQTTZpU0o4bXRkVWtaN1E0QgpzMHUzd1JzSUg4NEQwOE10cGxiU3ZTc2FSRDJlSjdUSjdhc3c0TTU3eWpQOFpxK2paVGNDQXdFQUFhTmhNRjh3CkRnWURWUjBQQVFIL0JBUURBZ0trTUIwR0ExVWRKUVFXTUJRR0NDc0dBUVVGQndNQkJnZ3JCZ0VGQlFjREFqQVAKQmdOVkhSTUJBZjhFQlRBREFRSC9NQjBHQTFVZERnUVdCQlI1eUVEQmZwTHczUUJmN3FTcUFyRmlRQ0wrd3pBTgpCZ2txaGtpRzl3MEJBUXNGQUFPQ0FRRUFPbXdtTDhzQ09Wek5FRkswYkNWRDEycFJUK0RaaFEwZTI5SnRtRVF6CmtJbW96T3Z4MkwwS0t6Q04vTEpPSjhzaURiRVVGU09NVlE3MURnZlpkTURKR2NzWE9seDdTVm9QNlcvNHcyTTIKMlROWUR0K0lFVUdoOUd0ZEdrTjQ1VGdNaWFHdU5JQUdnQktUQTBhMHZvQTJMZUQvZEV5UFRqcTlvdzJISzRIYwoxT3RETENId3dLT1lmSStobFNrempXZjFYcTM2bUg4ZGcvc2d3ai9KK1l3cXZlSVEzRU9SaFBtaE1QYzBMNThJClp1Q2lvZFVBek9ma0I3b0YrV0lOU2V2bjVzckxXYUlEL3J5c3NFRFVCQW9DVUxzN2ZTclJTSFFvcENuRC9pcU0KclVCQ21GdTVqVk1pYk0xem0zOVlRSm9hSmI3L1hJeTBFcTVhOHBVWkhuS1d4UT09Ci0tLS0tRU5EIENFUlRJRklDQVRFLS0tLS0K
[Diff truncated by flux-local]
--- HelmRelease: observability/victoria-metrics ServiceAccount: observability/victoria-metrics-victoria-metrics-operator-cleanup-hook
+++ HelmRelease: observability/victoria-metrics ServiceAccount: observability/victoria-metrics-victoria-metrics-operator-cleanup-hook
@@ -8,8 +8,8 @@
app.kubernetes.io/name: victoria-metrics-operator
app.kubernetes.io/instance: victoria-metrics
app.kubernetes.io/managed-by: Helm
annotations:
helm.sh/hook: pre-delete
helm.sh/hook-weight: '-5'
- helm.sh/hook-delete-policy: hook-succeeded
+ helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
--- HelmRelease: observability/victoria-metrics ClusterRole: observability/victoria-metrics-victoria-metrics-operator-cleanup-hook
+++ HelmRelease: observability/victoria-metrics ClusterRole: observability/victoria-metrics-victoria-metrics-operator-cleanup-hook
@@ -8,23 +8,24 @@
app.kubernetes.io/name: victoria-metrics-operator
app.kubernetes.io/instance: victoria-metrics
app.kubernetes.io/managed-by: Helm
annotations:
helm.sh/hook: pre-delete
helm.sh/hook-weight: '-5'
- helm.sh/hook-delete-policy: hook-succeeded
+ helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
rules:
- apiGroups:
- operator.victoriametrics.com
resources:
- - vmagents
- - vmalertmanagers
- - vmalerts
- - vmauths
- - vmclusters
- - vmsingles
+ - vlogs
+ - vmagent
+ - vmalert
+ - vmalertmanager
+ - vmalertmanagerconfig
+ - vmauth
+ - vmcluster
+ - vmrule
+ - vmsingle
+ - vmuser
verbs:
- - get
- - list
- - watch
- - delete
+ - '*'
--- HelmRelease: observability/victoria-metrics ClusterRoleBinding: observability/victoria-metrics-victoria-metrics-operator-cleanup-hook
+++ HelmRelease: observability/victoria-metrics ClusterRoleBinding: observability/victoria-metrics-victoria-metrics-operator-cleanup-hook
@@ -7,14 +7,14 @@
labels:
app.kubernetes.io/name: victoria-metrics-operator
app.kubernetes.io/instance: victoria-metrics
app.kubernetes.io/managed-by: Helm
annotations:
helm.sh/hook: pre-delete
- helm.sh/hook-weight: '-5'
- helm.sh/hook-delete-policy: hook-succeeded
+ helm.sh/hook-weight: '-4'
+ helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: victoria-metrics-victoria-metrics-operator-cleanup-hook
subjects:
- kind: ServiceAccount
--- HelmRelease: observability/victoria-metrics Job: observability/victoria-metrics-victoria-metrics-operator-cleanup-hook
+++ HelmRelease: observability/victoria-metrics Job: observability/victoria-metrics-victoria-metrics-operator-cleanup-hook
@@ -7,14 +7,14 @@
labels:
app.kubernetes.io/name: victoria-metrics-operator
app.kubernetes.io/instance: victoria-metrics
app.kubernetes.io/managed-by: Helm
annotations:
helm.sh/hook: pre-delete
- helm.sh/hook-weight: '-5'
- helm.sh/hook-delete-policy: hook-succeeded
+ helm.sh/hook-weight: '-3'
+ helm.sh/hook-delete-policy: before-hook-creation,hook-succeeded
spec:
template:
metadata:
name: victoria-metrics
labels:
app.kubernetes.io/name: victoria-metrics-operator
@@ -32,11 +32,11 @@
memory: 256Mi
requests:
cpu: 100m
memory: 56Mi
args:
- delete
- - vmagents,vmalertmanagers,vmalerts,vmauths,vmclusters,vmsingles
+ - vlogs,vmagents,vmalertmanagers,vmalerts,vmauths,vmclusters,vmsingles,vmusers
- --all
- --ignore-not-found=true
restartPolicy: OnFailure
This PR contains the following updates:
0.25.3
->0.25.5
Release Notes
VictoriaMetrics/helm-charts (victoria-metrics-k8s-stack)
### [`v0.25.5`](https://togithub.com/VictoriaMetrics/helm-charts/releases/tag/victoria-metrics-k8s-stack-0.25.5) [Compare Source](https://togithub.com/VictoriaMetrics/helm-charts/compare/victoria-metrics-k8s-stack-0.25.4...victoria-metrics-k8s-stack-0.25.5) ### Release notes for version 0.25.5 **Release date:** 2024-08-26 ![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion\&message=v1.102.1\&color=success\&logo=) ![Helm: v3](https://img.shields.io/static/v1?label=Helm\&message=v3\&color=informational\&logo=helm) - TODO ### [`v0.25.4`](https://togithub.com/VictoriaMetrics/helm-charts/releases/tag/victoria-metrics-k8s-stack-0.25.4) [Compare Source](https://togithub.com/VictoriaMetrics/helm-charts/compare/victoria-metrics-k8s-stack-0.25.3...victoria-metrics-k8s-stack-0.25.4) ### Release notes for version 0.25.4 **Release date:** 2024-08-26 ![AppVersion: v1.102.1](https://img.shields.io/static/v1?label=AppVersion\&message=v1.102.1\&color=success\&logo=) ![Helm: v3](https://img.shields.io/static/v1?label=Helm\&message=v3\&color=informational\&logo=helm) - updates operator to [v0.47.2](https://togithub.com/VictoriaMetrics/operator/releases/tag/v0.47.2) - kube-state-metrics - 5.16.4 -> 5.25.1 - prometheus-node-exporter - 4.27.0 -> 4.29.0 - grafana - 8.3.8 -> 8.4.7 - added configurable `.Values.global.clusterLabel` to all alerting and recording rules `by` and `on` expressionsConfiguration
📅 Schedule: Branch creation - "after 8am every weekday,before 7pm every weekday" in timezone Europe/Berlin, Automerge - At any time (no schedule defined).
🚦 Automerge: Disabled by config. Please merge this manually once you are satisfied.
â™» Rebasing: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox.
🔕 Ignore: Close this PR and you won't be reminded about this update again.
This PR was generated by Mend Renovate. View the repository job log.