grafana / kubernetes-app

A set of dashboards and panels for kubernetes.
https://grafana.com/plugins/grafana-kubernetes-app
Apache License 2.0
401 stars 149 forks source link

cronjobs.batch is forbidden #59

Closed tangjiaxing669 closed 5 years ago

tangjiaxing669 commented 5 years ago
[root@localhost prometheus]# kubectl version
Client Version: version.Info{Major:"1", Minor:"12", GitVersion:"v1.12.5", GitCommit:"51dd616cdd25d6ee22c83a858773b607328a18ec", GitTreeState:"clean", BuildDate:"2019-01-16T18:24:45Z", GoVersion:"go1.10.7", Compiler:"gc", Platform:"linux/amd64"}
Server Version: version.Info{Major:"1", Minor:"12", GitVersion:"v1.12.5", GitCommit:"51dd616cdd25d6ee22c83a858773b607328a18ec", GitTreeState:"clean", BuildDate:"2019-01-16T18:14:49Z", GoVersion:"go1.10.7", Compiler:"gc", Platform:"linux/amd64"}

image version:

prom/prometheus:v2.4.3
grafana/grafana:5.3.4

Then I deploy the grafana-kubernetes-app plugin, here is the configmap of prometheus:

[root@localhost prometheus]# cat prometheus-cm.yaml 
apiVersion: v1
kind: ConfigMap
metadata:
  name: prometheus-config
  namespace: monitoring
data:
  prometheus.yml: |
    global:
      scrape_interval: 15s
      scrape_timeout: 15s
    scrape_configs:
    - job_name: 'prometheus'
      static_configs:
      - targets: ['localhost:9090']

    - job_name: 'kubernetes-nodes'
      kubernetes_sd_configs:
      - role: node
      relabel_configs:
      - source_labels: [__address__]
        regex: '(.*):10250'
        replacement: '${1}:9100'
        target_label: __address__
        action: replace
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)

    - job_name: 'kubernetes-kubelet'
      kubernetes_sd_configs:
      - role: node
      relabel_configs:
      - source_labels: [__address__]
        regex: '(.*):10250'
        replacement: '${1}:10255'
        target_label: __address__
        action: replace
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)

    - job_name: 'kubernetes-cadvisor'
      kubernetes_sd_configs:
      - role: node
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - action: labelmap
        regex: __meta_kubernetes_node_label_(.+)
      - target_label: __address__
        replacement: kubernetes.default.svc:443
      - source_labels: [__meta_kubernetes_node_name]
        regex: (.+)
        target_label: __metrics_path__
        replacement: /api/v1/nodes/${1}/proxy/metrics/cadvisor

    - job_name: 'kubernetes-apiservers'
      kubernetes_sd_configs:
      - role: endpoints
      scheme: https
      tls_config:
        ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt
      bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token
      relabel_configs:
      - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name]
        action: keep
        regex: default;kubernetes;https

    - job_name: 'kubernetes-service-endpoints'
      kubernetes_sd_configs:
      - role: endpoints
      relabel_configs:
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scrape]
        action: keep
        regex: true
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_scheme]
        action: replace
        target_label: __scheme__
        regex: (https?)
      - source_labels: [__meta_kubernetes_service_annotation_prometheus_io_path]
        action: replace
        target_label: __metrics_path__
        regex: (.+)
      - source_labels: [__address__, __meta_kubernetes_service_annotation_prometheus_io_port]
        action: replace
        target_label: __address__
        regex: ([^:]+)(?::\d+)?;(\d+)
        replacement: $1:$2
      - action: labelmap
        regex: __meta_kubernetes_service_label_(.+)
      - source_labels: [__meta_kubernetes_namespace]
        action: replace
        target_label: kubernetes_namespace
      - source_labels: [__meta_kubernetes_service_name]
        action: replace
        target_label: kubernetes_name

After completion:

[root@localhost prometheus]# kubectl get pod -n kube-system
NAME                                    READY   STATUS    RESTARTS   AGE
coredns-779ffd89bd-rvd42                1/1     Running   0          3h29m
kube-state-metrics-fcdc7d964-qmbfg      1/1     Running   0          87m
kubernetes-dashboard-659798bd99-j6rj2   1/1     Running   0          3h26m
node-exporter-bhvvt                     1/1     Running   0          87m
node-exporter-jrncb                     1/1     Running   0          87m
node-exporter-q8tzh                     1/1     Running   0          87m
node-exporter-r52fg                     1/1     Running   0          87m
[root@localhost prometheus]# kubectl get pod -n monitoring
NAME                          READY   STATUS      RESTARTS   AGE
grafana-c7bf74c8-sbmmp        1/1     Running     0          93m
grafana-chown-rd96q           0/1     Completed   0          99m
prometheus-7c958795dd-6ndfj   1/1     Running     0          106m

When I visit grafana:

Only the k8s-container dashboard has data,I also see that the kube-state-metrics pod has the following exceptions:

E0204 09:33:42.538610       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/pod.go:187: Failed to list *v1.Pod: pods is forbidden: User "system:serviceaccount:kube-system:default" cannot list resource "pods" in API group "" at the cluster scope
E0204 09:33:42.738156       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/persistentvolumeclaim.go:78: Failed to list *v1.PersistentVolumeClaim: persistentvolumeclaims is forbidden: User "system:serviceaccount:kube-system:default" cannot list resource "persistentvolumeclaims" in API group "" at the cluster scope
E0204 09:33:42.923342       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/daemonset.go:82: Failed to list *v1beta1.DaemonSet: daemonsets.extensions is forbidden: User "system:serviceaccount:kube-system:default" cannot list resource "daemonsets" in API group "extensions" at the cluster scope
E0204 09:33:42.924153       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/cronjob.go:93: Failed to list *v2alpha1.CronJob: cronjobs.batch is forbidden: User "system:serviceaccount:kube-system:default" cannot list resource "cronjobs" in API group "batch" at the cluster scope
E0204 09:33:42.925186       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/replicaset.go:87: Failed to list *v1beta1.ReplicaSet: replicasets.extensions is forbidden: User "system:serviceaccount:kube-system:default" cannot list resource "replicasets" in API group "extensions" at the cluster scope
E0204 09:33:42.937971       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/namespace.go:80: Failed to list *v1.Namespace: namespaces is forbidden: User "system:serviceaccount:kube-system:default" cannot list resource "namespaces" in API group "" at the cluster scope
E0204 09:33:43.138574       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/node.go:142: Failed to list *v1.Node: nodes is forbidden: User "system:serviceaccount:kube-system:default" cannot list resource "nodes" in API group "" at the cluster scope

god.. who can help me. Thanks.

sensay-nelson commented 5 years ago

@tangjiaxing669 with regards to kube-state-metrics, you likely need a serviceaccount associated. See here https://github.com/sensay-nelson/kubernetes-app/pull/1/files for an example.

After which, you may also need to adjust the vars referenced in each dashboard ($node vs $instance, etc.), I did.

tangjiaxing669 commented 5 years ago

@sensay-nelson very thanks. I tried your suggestion, but I still got the following exception:

E0205 04:37:46.934888       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/cronjob.go:93: Failed to list *v2alpha1.CronJob: the server could not find the requested resource
E0205 04:37:47.935802       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/cronjob.go:93: Failed to list *v2alpha1.CronJob: the server could not find the requested resource
E0205 04:37:48.936863       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/cronjob.go:93: Failed to list *v2alpha1.CronJob: the server could not find the requested resource
E0205 04:37:49.937846       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/cronjob.go:93: Failed to list *v2alpha1.CronJob: the server could not find the requested resource
E0205 04:37:50.938767       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/cronjob.go:93: Failed to list *v2alpha1.CronJob: the server could not find the requested resource
E0205 04:37:51.939778       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/cronjob.go:93: Failed to list *v2alpha1.CronJob: the server could not find the requested resource
E0205 04:37:52.940760       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/cronjob.go:93: Failed to list *v2alpha1.CronJob: the server could not find the requested resource
E0205 04:37:53.941621       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/cronjob.go:93: Failed to list *v2alpha1.CronJob: the server could not find the requested resource
E0205 04:37:54.945170       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/cronjob.go:93: Failed to list *v2alpha1.CronJob: the server could not find the requested resource
E0205 04:37:55.946021       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/cronjob.go:93: Failed to list *v2alpha1.CronJob: the server could not find the requested resource
E0205 04:37:56.946958       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/cronjob.go:93: Failed to list *v2alpha1.CronJob: the server could not find the requested resource
E0205 04:37:57.947948       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/cronjob.go:93: Failed to list *v2alpha1.CronJob: the server could not find the requested resource
E0205 04:37:58.948895       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/cronjob.go:93: Failed to list *v2alpha1.CronJob: the server could not find the requested resource
E0205 04:37:59.949848       1 reflector.go:205] k8s.io/kube-state-metrics/collectors/cronjob.go:93: Failed to list *v2alpha1.CronJob: the server could not find the requested resource

The follow is my rbac config file:

apiVersion: v1
kind: ServiceAccount
metadata:
  name: prometheus
  namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
metadata:
  name: prometheus
rules:
- apiGroups: [""]
  resources:
  - configmaps
  - secrets
  - nodes
  - nodes/metrics
  - nodes/stats
  - nodes/log
  - nodes/spec
  - nodes/proxy
  - pods
  - services
  - resourcequotas
  - replicationcontrollers
  - limitranges
  - persistentvolumeclaims
  - persistentvolumes
  - namespaces
  - endpoints
  - proxy
  verbs:
  - list
  - watch
  - get
- apiGroups:
  - extensions
  resources:
  - daemonsets
  - deployments
  - replicasets
  - ingresses
  verbs:
  - list
  - watch
- apiGroups:
  - apps
  resources:
  - statefulsets
  - daemonsets
  - deployments
  - replicasets
  verbs:
  - list
  - watch
- apiGroups:
  - batch
  resources:
  - cronjobs
  - jobs
  verbs:
  - list
  - watch
- apiGroups:
  - autoscaling
  resources:
  - horizontalpodautoscalers
  verbs:
  - list
  - watch
- apiGroups:
  - authentication.k8s.io
  resources:
  - tokenreviews
  verbs:
  - create
- apiGroups:
  - authorization.k8s.io
  resources:
  - subjectaccessreviews
  verbs:
  - create
- nonResourceURLs: ["/metrics"]
  verbs: ["get"]
---
apiVersion: rbac.authorization.k8s.io/v1beta1
kind: ClusterRoleBinding
metadata:
  name: prometheus
roleRef:
  apiGroup: rbac.authorization.k8s.io
  kind: ClusterRole
  name: prometheus
subjects:
- kind: ServiceAccount
  name: prometheus
  namespace: kube-system

And I also tried to bind the "system:serviceaccount:kube-system:default" user to cluster-admin, and the result is still the same.

sensay-nelson commented 5 years ago

yeah, that is an outstanding issue specifically with cron.jobs no longer in beta. i believe the change is needed in the app, not just a config i'm afraid.

tangjiaxing669 commented 5 years ago

Ok, in this case, I closed the issue.