# devstats-helm
DevStats deployment on Equinix Ubuntu 20.04 LTS bare metal Kubernetes using Helm.
This is deployed:
NVME.md
.apt update
, apt upgrade
.vim /etc/ssh/sshd_config
add line PermitRootLogin yes
, change PasswordAuthentication no
to PasswordAuthentication yes
then sudo service sshd restart
.swapoff -a
.modprobe br_netfilter
cat <<EOF | tee /etc/sysctl.d/k8s.conf
net.bridge.bridge-nf-call-ip6tables = 1
net.bridge.bridge-nf-call-iptables = 1
EOF
sysctl --system
.
cat <<EOF | sudo tee /etc/modules-load.d/containerd.conf
overlay
br_netfilter
EOF
sudo modprobe overlay; sudo modprobe br_netfilter
.cat <<EOF | sudo tee /etc/sysctl.d/99-kubernetes-cri.conf
net.bridge.bridge-nf-call-iptables = 1
net.ipv4.ip_forward = 1
net.bridge.bridge-nf-call-ip6tables = 1
EOF
sudo sysctl --system
.sudo apt-get update && sudo apt-get install -y apt-transport-https ca-certificates curl software-properties-common
.curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key --keyring /etc/apt/trusted.gpg.d/docker.gpg add -
.sudo add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/ubuntu $(lsb_release -cs) stable"
.sudo apt-get update && sudo apt-get install -y containerd.io
.sudo mkdir -p /etc/containerd
.sudo containerd config default | sudo tee /etc/containerd/config.toml
.sudo systemctl restart containerd
.sudo systemctl enable containerd
.vim /etc/containerd/config.toml
, search: /plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options
.SystemdCgroup = true
, so it looks like:
base_runtime_spec = ""
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
SystemdCgroup = true
[plugins."io.containerd.grpc.v1.cri".cni]
service containerd restart
.curl -LO "https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl"
.chmod +x ./kubectl; mv ./kubectl /usr/local/bin/kubectl; kubectl version --client; kubectl completion bash
.vim ~/.bashrc
, uncomment . /etc/bash_completion
part, relogin, echo 'source <(kubectl completion bash)' >>~/.bashrc
, kubectl completion bash >/etc/bash_completion.d/kubectl
.echo 'alias k=kubectl' >>~/.bashrc; echo 'complete -F __start_kubectl k' >>~/.bashrc
.curl -s https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
.cat <<EOF | sudo tee /etc/apt/sources.list.d/kubernetes.list
deb https://apt.kubernetes.io/ kubernetes-xenial main
EOF
apt-get update && apt-get install -y kubelet kubeadm kubectl
.apt-mark hold kubelet kubeadm kubectl
systemctl daemon-reload; systemctl restart kubelet
.featureGates
and shutdownGracePeriod*
- not tested yet):
cat <<EOF | sudo tee /etc/kubeadm_cgroup_driver.yml
apiVersion: kubeadm.k8s.io/v1beta2
kind: InitConfiguration
localAPIEndpoint:
advertiseAddress: X.Y.Z.A1
---
apiVersion: kubeadm.k8s.io/v1beta2
kind: ClusterConfiguration
networking:
podSubnet: '192.168.0.0/16'
featureGates:
GracefulNodeShutdown: true
DynamicKubeletConfig: true
---
apiVersion: kubelet.config.k8s.io/v1beta1
kind: KubeletConfiguration
cgroupDriver: systemd
shutdownGracePeriod: 60s
shutdownGracePeriodCriticalPods: 20s
EOF
apt install -y nfs-common net-tools
./etc/hosts
add:
X.Y.Z.A1 devstats-master
X.Y.Z.A2 devstats-node-0
X.Y.Z.A3 devstats-node-1
X.Y.Z.A4 devstats-node-2
kubeadm init --config /etc/kubeadm_cgroup_driver.yml
.mkdir -p $HOME/.kube
sudo cp -i /etc/kubernetes/admin.conf $HOME/.kube/config
sudo chown $(id -u):$(id -g) $HOME/.kube/config
join.sh
on master and all nodes, something like then chmod +x join.sh
:
#!/bin/bash
kubeadm join 10.13.13.0:1234 --token xxxxxx.yyyyyyyyyyyy --discovery-token-ca-cert-hash sha256:0123456789abcdef0
wget https://docs.projectcalico.org/manifests/calico.yaml; kubectl apply -f calico.yaml
.kubectl taint nodes --all node-role.kubernetes.io/master-
.kubectl get po -A; kubectl get nodes
. Wait for all pods to be in Running
state../join.sh
.sftp root@devstats-node-N
:
mkdir .kube
lcd .kube
cd .kube
mput config
k get node; service kubelet status
.k get cm -n kube-system | grep kubeadm
, then k -n kube-system edit cm kubeadm-config
:kubeadm
options below the networking
in ClusterConfiguration
section, like this:
networking:
dnsDomain: cluster.local
podSubnet: 192.168.0.0/16
serviceSubnet: 10.96.0.0/12
featureGates:
GracefulNodeShutdown: true
DynamicKubeletConfig: true
k get cm -n kube-system | grep kubelet
, then k -n kube-system edit cm kubelet-config-1.20
:kubelet
options below the cgroup
driver in KubeletConfiguration
, like this:
cgroupDriver: systemd
shutdownGracePeriod: 60s
shutdownGracePeriodCriticalPods: 20s
maxPods: 255
kube-apiserver
: vim /etc/kubernetes/manifests/kube-apiserver.yaml
, add - --feature-gates=GracefulNodeShutdown=True,DynamicKubeletConfig=True
so it looks like:
- --tls-private-key-file=/etc/kubernetes/pki/apiserver.key
- --feature-gates=GracefulNodeShutdown=True,DynamicKubeletConfig=True
image: k8s.gcr.io/kube-apiserver:v1.20.0
kubelet
, run service kubelet status
- you will see something like --config=/var/lib/kubelet/config.yaml
:vim /var/lib/kubelet/config.yaml
, put your options there (master and all nodes):
shutdownGracePeriod: 60s
shutdownGracePeriodCriticalPods: 20s
vim /var/lib/kubelet/kubeadm-flags.env
update to something like (master and all nodes) KUBELET_KUBEADM_ARGS="... --feature-gates=GracefulNodeShutdown=True,DynamicKubeletConfig=True"
.k get no; k edit node node-name
, add under spec
section, so it looks like:
spec:
configSource:
configMap:
name: kubelet-config-1.20
namespace: kube-system
kubeletConfigKey: kubelet
podCIDR: 192.168.0.0/24
service kubelet restart
.kubelet
and kube-apiserver
are using feature gates: ps aux | grep kube-apiserver | grep feature-gates
, service kubelet status
.for node in devstats-master devstats-node-0 devstats-node-1 devstats-node-2; do k label node $node node=devstats-app; k label node $node node2=devstats-db; done
wget https://get.helm.sh/helm-v3.4.2-linux-amd64.tar.gz; tar zxvf helm-v3.4.2-linux-amd64.tar.gz; mv linux-amd64/helm /usr/local/bin; rm -rf linux-amd64/ helm-v3.4.2-linux-amd64.tar.gz
.helm repo add stable https://charts.helm.sh/stable
.helm repo add openebs https://openebs.github.io/charts
helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx
.helm repo update
./var/openebs
directory on all nodes is placed on the physical volume you want to use for local storage. You can have a huge NVMe disk mounted on /disk
for instance. In this case mv /var/openebs /disk/openebs; ln -s /disk/openebs /var/openebs
.k create ns openebs; helm install --namespace openebs openebs openebs/openebs; helm ls -n openebs; kubectl get pods -n openebs
.k get po -n openebs -w
): k patch storageclass openebs-hostpath -p '{"metadata": {"annotations":{"storageclass.kubernetes.io/is-default-class":"true"}}}'
.default
namespace: helm install local-storage-nfs stable/nfs-server-provisioner --set=persistence.enabled=true,persistence.storageClass=openebs-hostpath,persistence.size=8Ti,storageClass.name=nfs-openebs-localstorage
k create ns devstats-test; k create ns devstats-prod
.~/.kube/config
:
test
context: k config use-context test
.nginx-ingress
: helm install --namespace devstats-test nginx-ingress-test ingress-nginx/ingress-nginx --set controller.ingressClass=nginx-test,controller.scope.namespace=devstats-test,defaultBackend.enabled=false,controller.livenessProbe.initialDelaySeconds=15,controller.livenessProbe.periodSeconds=20,controller.livenessProbe.timeoutSeconds=5,controller.livenessProbe.successThreshold=1,controller.livenessProbe.failureThreshold=5,controller.readinessProbe.initialDelaySeconds=15,controller.readinessProbe.periodSeconds=20,controller.readinessProbe.timeoutSeconds=5,controller.readinessProbe.successThreshold=1,controller.readinessProbe.failureThreshold=5
.k edit svc -n devstats-test nginx-ingress-test-ingress-nginx-controller
add annotation: metallb.universe.tf/address-pool: test
and (very optional) spec: loadBalancerIP: 10.13.13.101
.prod
context: k config use-context prod
.nginx-ingress
: helm install --namespace devstats-prod nginx-ingress-prod ingress-nginx/ingress-nginx --set controller.ingressClass=nginx-prod,controller.scope.namespace=devstats-prod,defaultBackend.enabled=false,controller.livenessProbe.initialDelaySeconds=15,controller.livenessProbe.periodSeconds=20,controller.livenessProbe.timeoutSeconds=5,controller.livenessProbe.successThreshold=1,controller.livenessProbe.failureThreshold=5,controller.readinessProbe.initialDelaySeconds=15,controller.readinessProbe.periodSeconds=20,controller.readinessProbe.timeoutSeconds=5,controller.readinessProbe.successThreshold=1,controller.readinessProbe.failureThreshold=5
.k edit svc -n devstats-prod nginx-ingress-prod-ingress-nginx-controller
add annotation: metallb.universe.tf/address-pool: prod
and (very optional) spec loadBalancerIP: 10.13.13.102
.shared
context: k config use-context shared
.kubectl apply -f https://raw.githubusercontent.com/metallb/metallb/v0.9.5/manifests/namespace.yaml
.kubectl apply -f https://raw.githubusercontent.com/metallb/metallb/v0.9.5/manifests/metallb.yaml
.kubectl create secret generic -n metallb-system memberlist --from-literal=secretkey="$(openssl rand -base64 128)"
.master
IP for test
and node-0
IP for prod
, create file metallb-config.yaml
and apply if k apply -f metallb-config.yaml
:
apiVersion: v1
kind: ConfigMap
metadata:
namespace: metallb-system
name: config
data:
config: |
address-pools:
- name: prod
protocol: layer2
addresses:
- X.Y.Z.A1/32
- name: test
protocol: layer2
addresses:
- X.Y.Z.A2/32
k -n devstats-test get svc -o wide -w nginx-ingress-test-ingress-nginx-controller; k -n devstats-prod get svc -o wide -w nginx-ingress-prod-ingress-nginx-controller
).metallb
moved to quay.io instead of docker.io, so you need to change its image locations to:k edit deployment -n metallb-system controller
-> quay.io/metallb/controller:v0.9.8
.k edit daemonset -n metallb-system speaker
-> quay.io/metallb/speaker:v0.9.8
.You need to have domain name pointing to your MetalLB IP before proceeding.
Install SSL certificates using Let's encrypt and auto renewal using cert-manager
: SSL.md
.
In short:
kubectl create namespace cert-manager
.kubectl label namespace cert-manager certmanager.k8s.io/disable-validation=true
.kubectl apply -f https://github.com/jetstack/cert-manager/releases/download/v1.1.0/cert-manager.yaml
.cp cert/cert-issuer.yaml.example cert/cert-issuer.yaml
.vim cert/cert-issuer.yaml
.kubectl apply -f cert/cert-issuer.yaml
.kubectl get issuers
.k get challenge -w
wait until ready.wget https://golang.org/dl/go1.15.6.linux-amd64.tar.gz
.tar -C /usr/local -xzf go1.15.6.linux-amd64.tar.gz
.rm go1.15.6.linux-amd64.tar.gz
.echo 'export PATH=$PATH:/usr/local/go/bin' >> ~/.profile
go version
.apt install -y git
.go get -u github.com/cncf/devstatscode
.cd go/src/github.com/cncf/
Test instance:
test
context: k config use-context test
.devstats-helm
repo: git clone https://github.com/cncf/devstats-helm
, cd devstats-helm
.devstats-helm/secrets/*.secret.example
create corresponding secrets/*.secret
file. Vim saves with end line added, truncate such files via truncate -s -1 filename
.helm install devstats-test-secrets ./devstats-helm --set skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipBootstrap=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1
.helm install devstats-test-backups-pv ./devstats-helm --set skipSecrets=1,skipPVs=1,skipVacuum=1,skipBackups=1,skipBootstrap=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1
helm install devstats-test-patroni ./devstats-helm --set skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipBootstrap=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1
.Running
state: k get po -n devstats-test | grep devstats-postgres-
): k exec -n devstats-test -it devstats-postgres-0 -- /bin/bash
:
patronictl list
to see patroni cluster state.curl -s -XPATCH -d '{"loop_wait": "15", "postgresql": {"parameters": {"shared_buffers": "80GB", "max_parallel_workers_per_gather": "28", "max_connections": "1024", "min_wal_size": "1GB", "max_wal_size": "16GB", "effective_cache_size": "128GB", "maintenance_work_mem": "2GB", "checkpoint_completion_target": "0.9", "default_statistics_target": 1000, "effective_io_concurrency": 8, "random_page_cost": 1.1, "wal_buffers": "128MB", "max_worker_processes": "32", "max_parallel_workers": "32", "temp_file_limit": "50GB", "idle_in_transaction_session_timeout": "30min", "hot_standby": "on", "hot_standby_feedback": "on", "wal_log_hints": "on", "wal_keep_segments": "10", "wal_keep_size": "4GB", "wal_level": "replica", "max_wal_senders": "5", "max_replication_slots": "5"}, "use_pg_rewind": true}}' http://localhost:8008/config | jq .
patronictl restart --force devstats-postgres
.patronictl show-config
to confirm config.k logs -n devstats-test -f devstats-postgres-N
, N=0,1,2,3.helm install devstats-test-statics ./devstats-helm --set skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipBootstrap=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipIngress=1,skipAPI=1,skipNamespaces=1,indexStaticsFrom=0,indexStaticsTo=1
.helm install devstats-test-bootstrap ./devstats-helm --set skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1
.Completed
state): k delete po devstats-provision-bootstrap
.helm install devstats-test-backups ./devstats-helm --set skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBootstrap=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1
.helm install devstats-test-api ./devstats-helm --set skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipBootstrap=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipIngress=1,skipStatic=1,skipNamespaces=1
.helm install devstats-test-debug ./devstats-helm --set skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1,skipPostgres=1,bootstrapPodName=debug,bootstrapCommand=sleep,bootstrapCommandArgs={360000s},bootstrapMountBackups=1
.../devstats-k8s-lf/util/pod_shell.sh debug
.NOBACKUP='' NOAGE=1 GIANT=wait ONLY='dbname' ./devstats-helm/backups.sh
.helm delete devstats-test-debug
.helm install devstats-test-proj ./devstats-helm --set skipSecrets=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipBootstrap=1,indexPVsFrom=0,indexPVsTo=1,indexProvisionsFrom=0,indexProvisionsTo=1,indexCronsFrom=0,indexCronsTo=1,indexGrafanasFrom=0,indexGrafanasTo=1,indexServicesFrom=0,indexServicesTo=1,indexAffiliationsFrom=0,indexAffiliationsTo=1,skipPostgres=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1,skikAddAll=1,provisionCommand='devstats-helm/restore.sh',restoreFrom='https://teststats.cncf.io/backups/'
../scripts/deploy_test.sh
.Prod instance:
prod
context: k config use-context prod
.devstats-helm
repo: git clone https://github.com/cncf/devstats-helm
, cd devstats-helm
.helm install devstats-prod-secrets ./devstats-helm --set namespace='devstats-prod',skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipBootstrap=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1
.helm install devstats-prod-backups-pv ./devstats-helm --set namespace='devstats-prod',skipSecrets=1,skipPVs=1,skipVacuum=1,skipBackups=1,skipBootstrap=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1
.helm install devstats-prod-patroni ./devstats-helm --set namespace='devstats-prod',skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipBootstrap=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1
.Running
state: k get po -n devstats-prod | grep devstats-postgres-
): k exec -n devstats-prod -it devstats-postgres-0 -- /bin/bash
:
patronictl list
to see patroni cluster state.curl -s -XPATCH -d '{"loop_wait": "15", "postgresql": {"parameters": {"shared_buffers": "80GB", "max_parallel_workers_per_gather": "28", "max_connections": "1024", "min_wal_size": "1GB", "max_wal_size": "16GB", "effective_cache_size": "128GB", "maintenance_work_mem": "2GB", "checkpoint_completion_target": "0.9", "default_statistics_target": 1000, "effective_io_concurrency": 8, "random_page_cost": 1.1, "wal_buffers": "128MB", "max_worker_processes": "32", "max_parallel_workers": "32", "temp_file_limit": "50GB", "idle_in_transaction_session_timeout": "30min", "hot_standby": "on", "hot_standby_feedback": "on", "wal_log_hints": "on", "wal_keep_segments": "10", "wal_keep_size": "4GB", "wal_level": "replica", "max_wal_senders": "5", "max_replication_slots": "5"}, "use_pg_rewind": true}}' http://localhost:8008/config | jq .
patronictl restart --force devstats-postgres
.patronictl show-config
to confirm config.k logs -n devstats-prod -f devstats-postgres-N
, N=0,1,2,3.helm install devstats-prod-statics ./devstats-helm --set namespace='devstats-prod',skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipBootstrap=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipIngress=1,skipAPI=1,skipNamespaces=1,indexStaticsFrom=1
.helm install devstats-prod-ingress ./devstats-helm --set namespace='devstats-prod',skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipBootstrap=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipStatic=1,skipAPI=1,skipNamespaces=1,skipAliases=1,indexDomainsFrom=1,ingressClass=nginx-prod,sslEnv=prod
.helm install devstats-prod-bootstrap ./devstats-helm --set namespace='devstats-prod',skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1
.Completed
state): k delete po devstats-provision-bootstrap
.helm install devstats-prod-backups ./devstats-helm --set namespace='devstats-prod',skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBootstrap=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1,backupsCronProd='45 2 10,24 * *',backupsTestServer='',backupsProdServer='1'
.helm install devstats-prod-api ./devstats-helm --set namespace='devstats-prod',skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipBootstrap=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipIngress=1,skipStatic=1,skipNamespaces=1,apiImage='lukaszgryglicki/devstats-api-prod'
.helm install devstats-prod-debug ./devstats-helm --set namespace='devstats-prod',skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1,bootstrapPodName=debug,bootstrapCommand=sleep,bootstrapCommandArgs={360000s},bootstrapMountBackups=1
.../devstats-k8s-lf/util/pod_shell.sh debug
.NOBACKUP='' NOAGE=1 GIANT=wait ONLY='dbname' ./devstats-helm/backups.sh
.helm delete devstats-prod-debug
.helm install devstats-prod-proj ./devstats-helm --set namespace='devstats-prod',skipSecrets=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipBootstrap=1,skipPostgres=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1,indexPVsFrom=0,indexPVsTo=1,indexProvisionsFrom=0,indexProvisionsTo=1,indexCronsFrom=0,indexCronsTo=1,indexGrafanasFrom=0,indexGrafanasTo=1,indexServicesFrom=0,indexServicesTo=1,indexAffiliationsFrom=0,indexAffiliationsTo=1,provisionImage='lukaszgryglicki/devstats-prod',provisionCommand='devstats-helm/restore.sh',restoreFrom='https://devstats.cncf.io/backups/',testServer='',prodServer='1'
../scripts/deploy_prod.sh
.See either test/README.md
or prod/README.md
.
You should set namespace to 'devstats-test' or 'devstats-prod' first: ./switch_context.sh test
.
Please provide secret values for each file in ./secrets/*.secret.example
saving it as ./secrets/*.secret
or specify them from the command line.
Please note that vim
automatically adds new line to all text files, to remove it run truncate -s -1
on a saved file.
List of secrets:
secrets/PG_ADMIN_USER.secret
or --set pgAdminUser=...
setup postgres admin user name.secrets/PG_HOST.secret
or --set pgHost=...
setup postgres host name.secrets/PG_HOST_RO.secret
or --set pgHostRO=...
setup postgres host name (read-only).secrets/PG_PASS.secret
or --set pgPass=...
setup postgres password for the default user (gha_admin).secrets/PG_PASS_RO.secret
or --set pgPassRO=...
setup for the read-only user (ro_user).secrets/PG_PASS_TEAM.secret
or --set pgPassTeam=...
setup the team user (also read-only) (devstats_team).secrets/PG_PASS_REP.secret
or --set pgPassRep=...
setup the replication user.secrets/PG_PORT.secret
or --set pgPort=...
setup postgres port.secrets/GHA2DB_GITHUB_OAUTH.secret
or --set githubOAuth=...
setup GitHub OAuth token(s) (single value or comma separated list of tokens).secrets/GF_SECURITY_ADMIN_USER.secret
or --set grafanaUser=...
setup Grafana admin user name.secrets/GF_SECURITY_ADMIN_PASSWORD.secret
or --set grafanaPassword=...
setup Grafana admin user password.You can select which secret(s) should be skipped via: --set skipPGSecret=1,skipGitHubSecret=1,skipGrafanaSecret=1
.
You can install only selected templates, see values.yaml
for detalis (refer to skipXYZ
variables in comments), example:
helm install --dry-run --debug --generate-name ./devstats-helm --set skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipBootstrap=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1,runTests=1,ingressClass=nginx-test
.You can restrict ranges of projects provisioned and/or range of cron jobs to create via:
--set indexPVsFrom=5,indexPVsTo=9,indexProvisionsFrom=5,indexProvisionsTo=9,indexCronsFrom=5,indexCronsTo=9,indexAffiliationsFrom=5,indexAffiliationsTo=9,indexGrafanasFrom=5,indexGrafanasTo=9,indexServicesFrom=5,indexServicesTo=9,indexIngressesFrom=5,indexIngressesTo=9,indexDomainsFrom=0,indexDomainsTo=2,indexStaticsFrom=0,indexStaticsTo=2
.You can overwrite the number of CPUs autodetected in each pod, setting this to 1 will make each pod single-threaded
--set nCPUs=1
.You can deploy reports pod (it waits forever) so you can bash into it and generate DevStats reports: --set reportsPod=1
. See test/README.md
for details, search for reportsPod
.
Please note variables commented out in ./devstats-helm/values.yaml
. You can either uncomment them or pass their values via --set variable=name
.
Resource types used: secret, pv, pvc, po, cronjob, deployment, svc
To debug provisioning use:
helm install --debug --dry-run --generate-name ./devstats-helm --set skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipBootstrap=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipPostgres=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1,ingressClass=nginx-test,indexProvisionsFrom=0,indexProvisionsTo=1,provisionPodName=debug,provisionCommand=sleep,provisionCommandArgs={36000s}
.helm install --generate-name ./devstats-helm --set skipSecrets=1,skipPVs=1,skipBackupsPV=1,skipVacuum=1,skipBackups=1,skipProvisions=1,skipCrons=1,skipAffiliations=1,skipGrafanas=1,skipServices=1,skipIngress=1,skipStatic=1,skipAPI=1,skipNamespaces=1,skipPostgres=1,ingressClass=nginx-test,bootstrapPodName=debug,bootstrapCommand=sleep,bootstrapCommandArgs={36000s}
github.com/cncf/devstats-k8s-lf
: ./util/pod_shell.sh devstats-provision-cncf
.PG_USER=gha_admin db.sh psql cncf
, followed: select dt, proj, prog, msg from gha_logs where proj = 'cncf' order by dt desc limit 40;
.kubectl delete pod devstats-provision-cncf
.DevStats data sources:
Storage:
ReadWriteOnce
and are private to their corresponding pods.Database:
Cluster:
v1.20
Kubernetes that is set up manually as described in this document. Kubernetes uses CoreDNS and docker as CRI, docker uses containerd.m2.xlarge.x86
) in SV15
zone (Silicon Valley).UI:
DNS:
SSL/HTTPS:
cert-manager
to automatically obtain and renewal Let's Encrypt certificates for our domain.Ingress:
nginx-ingress
to provide HTTPS and to disable plain HTTP access.cert-manager
).prometheus.teststats.cncf.io
or envoy.devstats.cncf.io
we're redirecting traffic to a specific Grafana service (running on port 80 inside the cluster).Deployment:
Resource configuration:
app
pods for Grafanas and Devstats pods and db
for patroni pods)Secrets:
*.secret
files (they're gitignored and not checked into the repo). Each such file has *.secret.example
counterpart as a hint for user to create the actual *.secret
file.Docker images:
github.com/cncf/devstats-docker-images
and pushed to the docker hub under lukaszgryglicki
username.devstats-test
, devstats-prod
- full devstats images, contining provisioning/bootstrap scripts - used for provisioning each project and initial bootstapping database (different for test and prod deployments).devstats-minimal-test
, devstats-minimal-prod
- minimal devstats images, used by hourly-sync cron jobs (contains only tools needed to do a hourly sync).devstats-grafana
- Grafana image containing all tools to provision Grafana for a given project (dashboards JSONs, datasource/config templates etc.).devstats-tests
- image containing all DevStats tests (it contains Go 1.12 runtime and postgres 11 database, executes database, series, metrics, regexp and other tests and other checks: format, lint, imports, vet, const, usedexports, errcheck).lukaszgryglicki/devstats-patroni
- patroni for handling database directory permissions on already existing PVs.CI/CD:
devstats-test
image which has its own Postgres 11 database and Go 1.12 runtime.Kubernetes dashboard
Architecture:
cert-manager
updates its annotations when SSL cert is renewed.patroni
REST API port 8008. Holds full patroni configuration.postgres-service-config
.postgres-service-ro
.postgres-service
- this remains constant while underlying endpoint will direct to the current patroni master node.See ADDING_NEW_PROJECTS.md
for informations about how to add more projects.
If you get Unable to connect to the server: x509: certificate has expired or is not yet valid
error, that means your kubectl certificates have expired.
You can check their expiration date via: kubeadm certs check-expiration
.
You can renew them via: kubeadm certs renew all
.
Then you need: cp ~/.kube/config ~/.kube/config.2021-12-15; cp /etc/kubernetes/admin.conf ~/.kube/config; cd ~/.kube/
, then you need to merge them, especially your config had special contxts - only update cert related data.
For all nodes: sftp user@node-name
, then cd .kube
, rm config
, mput config
, mput config.2021-12-15
.
Restart all nodes one after another.
This should only be done ideally right after the initial installation, otherwise you need to stop all cron-jobs and wait for other tasks to finish, this will render the cluster unusable for a while.
Stop the containerd
service: service containerd stop
.
Assuming that you have an NVMNE volume mounted on /data
, do the following:
mkdir /data/run/
mkdir /data/run/containerd/
mkdir /data/var
mkdir /data/var/lib
mkdir /data/var/lib/containerd
mv /run/containerd /run/containerd.old
mkdir /data/run/containerd
ln -s /data/run/containerd /run/containerd
mv /var/lib/containerd/ /var/lib/containerd.old
mkdir /data/var/lib/containerd/
ln -s /data/var/lib/containerd/ /var/lib/containerd
service containerd restart
.Eventually also kubelet
:
Stop the kubelet
service: service kubelet stop
.
mkdir /data/var/lib/kubelet
mv /var/lib/kubelet /var/lib/kubelet.old
ln -s /data/var/lib/kubelet /var/lib/kubelet
service kubelet restart
.Assuming that you have an NVMNE volume mounted on /data
, do the following:
mkdir /data/run/
mkdir /data/run/containerd/
mkdir /data/var
mkdir /data/var/lib
mkdir /data/var/lib/containerd
Then edit containerd
's config file: vim /etc/containerd/config.toml
:
version = 2
root = "/data/var/lib/containerd"
state = "/data/run/containerd"
service containerd restart
.