Open aibangjuxin opened 3 weeks ago
provider "google" { credentials = file("./sa.json") project = "${var.project}" region = "asia-east1" } resource "google_container_cluster" "private_cluster" { provider = google-beta project = "${var.project}" name = "private-cluster" location = "asia-east1" network = google_compute_network.vpc_network.self_link subnetwork = google_compute_subnetwork.subnet.self_link private_cluster_config { enable_private_nodes = true enable_private_endpoint = true master_ipv4_cidr_block = "172.16.0.0/28" } master_authorized_networks_config { cidr_blocks { cidr_block = "192.168.64.0/24" display_name = "any" } } ip_allocation_policy {} remove_default_node_pool = true initial_node_count = 1 workload_identity_config { workload_pool = "${var.project}.svc.id.goog" } monitoring_config { enable_components = [ "SYSTEM_COMPONENTS", "APISERVER", "CONTROLLER_MANAGER", "SCHEDULER" ] managed_prometheus { enabled = true } } } resource "google_container_node_pool" "private_cluster_node_pool" { provider = google-beta project = "${var.project}" name = "private-cluster-node-pool" location = "asia-east1" cluster = google_container_cluster.private_cluster.name node_count = 1 node_config { machine_type = "n1-standard-1" oauth_scopes = [ "https://www.googleapis.com/auth/cloud-platform" ] metadata = { disable-legacy-endpoints = "true" } tags = ["no-external-ip"] } } resource "google_compute_network" "vpc_network" { name = "gke-network" auto_create_subnetworks = false } resource "google_compute_subnetwork" "subnet" { name = "subnet" ip_cidr_range = "192.168.64.0/24" region = "asia-east1" network = google_compute_network.vpc_network.self_link } resource "google_compute_instance" "squid_proxy" { name = "squid-proxy" machine_type = "n1-standard-1" zone = "asia-east1-a" boot_disk { initialize_params { image = "ubuntu-os-cloud/ubuntu-2004-lts" } } network_interface { network = google_compute_network.vpc_network.name subnetwork = google_compute_subnetwork.subnet.name access_config { } } metadata_startup_script = <<-EOT #! /bin/bash sudo apt-get update sudo apt-get install -y squid sudo sed -i 's/http_access deny all/http_access allow all/' /etc/squid/squid.conf sudo systemctl restart squid EOT service_account { scopes = [ "https://www.googleapis.com/auth/cloud-platform" ] } tags = ["squid-proxy"] } resource "google_compute_firewall" "no_external_ip_firewall" { name = "no-external-ip-firewall" network = google_compute_network.vpc_network.self_link priority = 65535 source_ranges = ["0.0.0.0/0"] target_tags = ["no-external-ip"] deny { protocol = "icmp" } deny { protocol = "tcp" } deny { protocol = "udp" } } resource "google_compute_firewall" "allow_ports_firewall" { name = "allow-squid" network = google_compute_network.vpc_network.name allow { protocol = "tcp" ports = ["3128"] } allow { protocol = "tcp" ports = ["22"] } source_ranges = ["0.0.0.0/0"] target_tags = ["squid-proxy"] description = "Allow traffic on port 3128 for Squid proxy from the GKE cluster subnet" } resource "google_compute_firewall" "allow_proxy_to_pod" { name = "allow-proxy-to-pod" network = google_compute_network.vpc_network.name allow { protocol = "tcp" ports = ["8080"] } allow { protocol = "tcp" ports = ["80"] } source_ranges = ["192.168.1.0/24"] description = "Allow traffic from proxy subnet to GKE Pods on port 8080" } variable "project" { default = "causal-hour-418204" type = string }
gcloud iam service-accounts create collector --project=causal-hour-418204
gcloud iam service-accounts add-iam-policy-binding collector@causal-hour-418204.iam.gserviceaccount.com \ --member="serviceAccount:causal-hour-418204.svc.id.goog[gmp-system/collector]" \ --role="roles/iam.workloadIdentityUser" \ --project=causal-hour-418204
gcloud projects add-iam-policy-binding causal-hour-418204 \ --member="serviceAccount:collector@causal-hour-418204.iam.gserviceaccount.com" \ --role="roles/monitoring.metricWriter"
kubectl annotate serviceaccount collector --namespace gmp-system \ iam.gke.io/gcp-service-account=collector@causal-hour-418204.iam.gserviceaccount.com
gcloud iam service-accounts create prometheus-ui --project=causal-hour-418204
gcloud iam service-accounts add-iam-policy-binding prometheus-ui@causal-hour-418204.iam.gserviceaccount.com \ --member="serviceAccount:causal-hour-418204.svc.id.goog[monitoring/prometheus-ui]" \ --role="roles/iam.workloadIdentityUser" \ --project=causal-hour-418204
gcloud projects add-iam-policy-binding causal-hour-418204 \ --member="serviceAccount:prometheus-ui@causal-hour-418204.iam.gserviceaccount.com" \ --role="roles/monitoring.viewer"
kubectl annotate serviceaccount prometheus-ui --namespace monitoring \ iam.gke.io/gcp-service-account=prometheus-ui@causal-hour-418204.iam.gserviceaccount.com
- Deploy Node_exporter
kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 metadata: name: psp-node-exporter labels: app: node-exporter rules:
apiGroups:
apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: psp-node-exporter labels: app: node-exporter roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: psp-node-exporter subjects:
apiVersion: apps/v1 kind: DaemonSet metadata: name: node-exporter namespace: monitoring labels: app: node-exporter spec: selector: matchLabels: app: node-exporter updateStrategy: type: RollingUpdate rollingUpdate: maxUnavailable: 1 template: metadata: labels: app: node-exporter spec: serviceAccountName: node-exporter securityContext: runAsNonRoot: true runAsUser: 65534 containers:
apiVersion: monitoring.googleapis.com/v1 kind: PodMonitoring metadata: name: node-exporter namespace: monitoring spec: selector: matchLabels: app: node-exporter endpoints:
Deploy Prometheus UI
--- apiVersion: v1 kind: ServiceAccount metadata: name: prometheus-ui namespace: monitoring annotations: iam.gke.io/gcp-service-account: prometheus-ui@causal-hour-418204.iam.gserviceaccount.com # TODO: replace (devops-367201) project-id --- apiVersion: apps/v1 kind: Deployment metadata: name: frontend namespace: monitoring spec: replicas: 1 selector: matchLabels: app: frontend template: metadata: labels: app: frontend spec: serviceAccountName: prometheus-ui containers: - name: frontend image: gke.gcr.io/prometheus-engine/frontend:v0.5.0-gke.0 args: - --web.listen-address=:9090 - --query.project-id=causal-hour-418204 ports: - name: web containerPort: 9090 readinessProbe: httpGet: path: /-/ready port: web livenessProbe: httpGet: path: /-/healthy port: web --- apiVersion: v1 kind: Service metadata: name: frontend namespace: monitoring spec: type: LoadBalancer selector: app: frontend ports: - name: web port: 9090 targetPort: 9090
Deploy Grafana
--- apiVersion: v1 kind: Secret metadata: namespace: monitoring name: grafana type: Opaque data: admin-user: "YWRtaW4=" # base64 encoded "admin" admin-password: "ZGV2b3BzMTIz" # base64 encoded "devops123" --- apiVersion: v1 kind: ConfigMap metadata: namespace: monitoring name: grafana data: grafana.ini: | [analytics] reporting_enabled = false check_for_updates = true [log] mode = console [paths] data = /var/lib/grafana/data logs = /var/log/grafana plugins = /var/lib/grafana/plugins provisioning = /etc/grafana/provisioning [auth.anonymous] enabled = false [metrics] enabled = true disable_total_stats = false --- apiVersion: v1 kind: ConfigMap metadata: name: datasources namespace: monitoring data: datasources.yaml: | apiVersion: 1 datasources: - name: PrometheusUI type: prometheus url: http://frontend.monitoring:9090 isDefault: true jsonData: manageAlerts: false timeout: 60 --- apiVersion: apps/v1 kind: Deployment metadata: namespace: monitoring name: grafana spec: replicas: 1 selector: matchLabels: app: grafana strategy: type: Recreate template: metadata: labels: app: grafana spec: securityContext: fsGroup: 472 runAsUser: 472 containers: - name: grafana image: grafana/grafana:9.2.3 imagePullPolicy: IfNotPresent volumeMounts: - name: config mountPath: "/etc/grafana/grafana.ini" subPath: grafana.ini - name: datasources mountPath: "/etc/grafana/provisioning/datasources/datasources.yaml" subPath: datasources.yaml ports: - name: grafana containerPort: 3000 protocol: TCP env: - name: GF_SECURITY_ADMIN_USER valueFrom: secretKeyRef: name: grafana key: admin-user - name: GF_SECURITY_ADMIN_PASSWORD valueFrom: secretKeyRef: name: grafana key: admin-password livenessProbe: failureThreshold: 10 httpGet: path: /api/health port: 3000 initialDelaySeconds: 60 timeoutSeconds: 30 readinessProbe: httpGet: path: /api/health port: 3000 resources: requests: cpu: 500m memory: 1Gi limits: cpu: 2000m memory: 4Gi volumes: - name: config configMap: name: grafana - name: datasources configMap: name: datasources - name: dashboards --- apiVersion: v1 kind: Service metadata: namespace: monitoring name: grafana labels: app: grafana spec: type: LoadBalancer ports: - name: service port: 3000 protocol: TCP selector: app: grafana
Originally posted by @zking2000 in https://github.com/zking2000/NotePad/issues/18
gcloud iam service-accounts add-iam-policy-binding collector@causal-hour-418204.iam.gserviceaccount.com \ --member="serviceAccount:causal-hour-418204.svc.id.goog[gmp-system/collector]" \ --role="roles/iam.workloadIdentityUser" \ --project=causal-hour-418204
gcloud projects add-iam-policy-binding causal-hour-418204 \ --member="serviceAccount:collector@causal-hour-418204.iam.gserviceaccount.com" \ --role="roles/monitoring.metricWriter"
kubectl annotate serviceaccount collector --namespace gmp-system \ iam.gke.io/gcp-service-account=collector@causal-hour-418204.iam.gserviceaccount.com
gcloud iam service-accounts create prometheus-ui --project=causal-hour-418204
gcloud iam service-accounts add-iam-policy-binding prometheus-ui@causal-hour-418204.iam.gserviceaccount.com \ --member="serviceAccount:causal-hour-418204.svc.id.goog[monitoring/prometheus-ui]" \ --role="roles/iam.workloadIdentityUser" \ --project=causal-hour-418204
gcloud projects add-iam-policy-binding causal-hour-418204 \ --member="serviceAccount:prometheus-ui@causal-hour-418204.iam.gserviceaccount.com" \ --role="roles/monitoring.viewer"
kubectl annotate serviceaccount prometheus-ui --namespace monitoring \ iam.gke.io/gcp-service-account=prometheus-ui@causal-hour-418204.iam.gserviceaccount.com
apiVersion: v1 kind: Namespace metadata: name: monitoring
apiVersion: v1 kind: ServiceAccount metadata: name: node-exporter namespace: monitoring
kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 metadata: name: psp-node-exporter labels: app: node-exporter rules:
apiGroups:
node-exporter
apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: psp-node-exporter labels: app: node-exporter roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: psp-node-exporter subjects:
kind: ServiceAccount name: node-exporter namespace: monitoring
apiVersion: apps/v1 kind: DaemonSet metadata: name: node-exporter namespace: monitoring labels: app: node-exporter spec: selector: matchLabels: app: node-exporter updateStrategy: type: RollingUpdate rollingUpdate: maxUnavailable: 1 template: metadata: labels: app: node-exporter spec: serviceAccountName: node-exporter securityContext: runAsNonRoot: true runAsUser: 65534 containers:
name: sys hostPath: path: /sys
apiVersion: monitoring.googleapis.com/v1 kind: PodMonitoring metadata: name: node-exporter namespace: monitoring spec: selector: matchLabels: app: node-exporter endpoints:
Deploy Prometheus UI
Deploy Grafana
Originally posted by @zking2000 in https://github.com/zking2000/NotePad/issues/18