zking2000 / NotePad

1 stars 0 forks source link

GKE BACKUP & RESTORE #17

Open 44084750 opened 1 month ago

44084750 commented 1 month ago

gcloud CLI

gcloud services enable gkebackup.googleapis.com

# Create Backup Plan

gcloud beta container backup-restore backup-plans create stephen-backup-plan \
  --project=causal-hour-418204 \
  --location=asia-east1 \
  --cluster=projects/causal-hour-418204/locations/asia-east1/clusters/private-cluster \
  --include-volume-data \
  --include-secrets \
  --backup-retain-days=30 \
  --all-namespaces \
  --http-timeout=3600

# output

Create request issued for: [stephen-backup-plan]
Waiting for operation [projects/causal-hour-418204/locations/asia-east1/operations/operation-1717225758238-619cec751a875-8131b871-a49cfc3d] to complete...done.
Created backup plan [stephen-backup-plan].
# Create Backup

gcloud beta container backup-restore backups create stephen-backup \
  --project=causal-hour-418204 \
  --location=asia-east1 \
  --backup-plan=stephen-backup-plan \
  --async
gcloud beta container backup-restore backups list

gcloud beta container backup-restore backups describe projects/causal-hour-418204/locations/asia-east1/backupPlans/stephen-backup-plan/backups/stephen-backup

# Output
allNamespaces: true
clusterMetadata:
  backupCrdVersions:
    backupjobs.gkebackup.gke.io: v1
    protectedapplicationgroups.gkebackup.gke.io: v1
    protectedapplications.gkebackup.gke.io: v1
    restorejobs.gkebackup.gke.io: v1
  cluster: projects/causal-hour-418204/locations/asia-east1/clusters/private-cluster
  gkeVersion: v1.28.9-gke.1000000
  k8sVersion: '1.28'
completeTime: '2024-06-01T07:14:45.279061081Z'
configBackupSizeBytes: '505730'
containsSecrets: true
containsVolumeData: true
createTime: '2024-06-01T07:14:33.992646731Z'
deleteLockExpireTime: '2024-06-01T07:14:33.987276321Z'
etag: '8'
manual: true
name: projects/causal-hour-418204/locations/asia-east1/backupPlans/stephen-backup-plan/backups/stephen-backup
resourceCount: 407
retainDays: 30
retainExpireTime: '2024-07-01T07:58:33.987276321Z'
sizeBytes: '505730'
state: SUCCEEDED
uid: 46a124b2-14c9-45a4-a606-0466155dc70d
updateTime: '2024-06-01T07:14:46.389730812Z'
# Create Restore Plan

gcloud beta container backup-restore restore-plans create stephen-restore-plan \
  --project=causal-hour-418204 \
  --location=asia-east1 \
  --cluster=projects/causal-hour-418204/locations/asia-east1/clusters/private-cluster \
  --backup-plan=projects/causal-hour-418204/locations/asia-east1/backupPlans/stephen-backup-plan \
  --description="Restore plan for Stephen's backup" \
  --all-namespaces \
  --namespaced-resource-restore-mode=merge-replace-on-conflict

# Output

Create request issued for: [stephen-restore-plan]
Waiting for operation [projects/causal-hour-418204/locations/asia-east1/operations/operation-1717228770003-619cf7ad589ee-a70b79d7-af271d30] to complete...done.
Created restore plan [stephen-restore-plan].

# Create Restore

gcloud beta container backup-restore restores create stephen-restore-nginx \
  --project=causal-hour-418204 \
  --location=asia-east1 \
  --restore-plan=stephen-restore-plan \
  --backup=projects/causal-hour-418204/locations/asia-east1/backupPlans/stephen-backup-plan/backups/stephen-backup-nginx \
  --async

Tips: 在 GKE 中,备份和还原是针对集群中的资源的状态和配置的。当你执行还原操作时,它会还原备份时的整个集群状态,包括所有的资源(如 Deployment、Pod 等)。然而,还原操作通常不会删除集群中已经存在的资源,而是会尝试将备份的状态与当前集群状态合并,可能会导致一些资源重复存在或冲突。

新建的 Deployment 在还原操作后仍然存在,这是因为还原操作通常不会删除现有的资源。如果你希望还原操作删除特定的资源,你可以在还原之前手动删除这些资源,或者在还原操作中使用适当的参数来指定删除现有资源的行为(如果支持的话)。

需要注意的是,并非所有资源都支持在还原操作中自动删除。有些资源可能需要手动处理。因此,在进行还原操作时,请确保仔细查看文档,并了解每种资源的还原行为。

我的场景:

  1. 创建cluster -> 创建 backup plan -> 创建backup -> 创建deployment nginx -> 创建restore plan -> 创建restore : 新创建的deployment仍然存在
  2. 创建cluster -> 创建deployment nginx -> 创建 backup plan -> 创建backup -> 删除deployment nginx -> 创建restore plan -> 创建restore : 新创建的deployment正常恢复

Terraform

provider "google-beta" {
  credentials = file("./sa.json")
  project     = "${var.PROJECT_ID}"
  region      = "${var.REGION}"
}

resource "google_gke_backup_backup_plan" "terraform_backup_plan" {
  name        = "terraform-backup-plan"
  description = "terraform backup plan for GKE cluster"
  project     = "${var.PROJECT_ID}"
  location    = "${var.REGION}"
  cluster     = "projects/${var.PROJECT_ID}/locations/${var.REGION}/clusters/${var.CLUSTER_NAME}"
  retention_policy {
    backup_delete_lock_days = 0
    backup_retain_days = 180
  }
  backup_schedule {
    cron_schedule = "0 2 * * *"
  }
  backup_config {
    include_volume_data = true
    include_secrets = true
    all_namespaces = true
  }
}

resource "google_gke_backup_restore_plan" "terraform_restore_plan" {
  name        = "terraform-restore-plan"
  description = "terraform restore plan for GKE cluster"
  project     = "${var.PROJECT_ID}"
  location        = "${var.REGION}"
  cluster         = "projects/${var.PROJECT_ID}/locations/${var.REGION}/clusters/${var.CLUSTER_NAME}"
  backup_plan     = google_gke_backup_backup_plan.terraform_backup_plan.id
  restore_config {
    all_namespaces = true
    namespaced_resource_restore_mode = "FAIL_ON_CONFLICT"
    volume_data_restore_policy = "RESTORE_VOLUME_DATA_FROM_BACKUP"
    cluster_resource_restore_scope {
      all_group_kinds = true
    }
    cluster_resource_conflict_policy = "USE_EXISTING_VERSION"
  }
}

variable "PROJECT_ID" {
  default = "causal-hour-418204"
  type = string
}

variable "REGION" {
  default = "asia-east1"
  type = string
}

variable "CLUSTER_NAME" {
  default = "private-cluster"
  type = string
}
44084750 commented 1 month ago

Delete Backups BackupPlan Restores RestorePlan

#!/bin/bash

# Set default values for project ID and location
PROJECT_ID=${1:-"causal-hour-418204"}
LOCATION=${2:-"asia-east1"}

# Function to delete backups
delete_backups() {
  local backup_plan=$1
  backups=$(gcloud beta container backup-restore backups list --project=${PROJECT_ID} --location=${LOCATION} --backup-plan=${backup_plan} --format="value(name)")
  for backup in ${backups}; do
    # Check if the backup is locked
    delete_lock_time=$(gcloud beta container backup-restore backups describe "${backup}" --project=${PROJECT_ID} --location=${LOCATION} --backup-plan=${backup_plan} --format="value(deleteLockDays)")
    if [[ -n "${delete_lock_time}" ]]; then
      echo "Skipping backup ${backup} as it is locked for deletion until ${delete_lock_time}"
      continue
    fi

    echo "Deleting backup: ${backup}"
    if gcloud beta container backup-restore backups delete ${backup} --project=${PROJECT_ID} --location=${LOCATION} --backup-plan=${backup_plan} --quiet; then
      echo "Backup ${backup} deleted successfully"
    else
      echo "Failed to delete backup ${backup}"
    fi
  done
}

# Function to delete backup plans
delete_backup_plans() {
  backup_plans=$(gcloud beta container backup-restore backup-plans list --project=${PROJECT_ID} --location=${LOCATION} --format="value(name)")
  for backup_plan in ${backup_plans}; do
    echo "Deleting backups in backup plan: ${backup_plan}"
    delete_backups "${backup_plan}"
    echo "Deleting backup plan: ${backup_plan}"
    if gcloud beta container backup-restore backup-plans delete "${backup_plan}" --project=${PROJECT_ID} --location=${LOCATION} --quiet; then
      echo "Backup plan ${backup_plan} deleted successfully"
    else
      echo "Failed to delete backup plan ${backup_plan}"
    fi
  done
}

# Function to delete restores
delete_restores() {
  local restore_plan=$1
  restores=$(gcloud beta container backup-restore restores list --project=${PROJECT_ID} --location=${LOCATION} --restore-plan=${restore_plan} --format="value(name)")
  for restore in ${restores}; do
    echo "Deleting restore: ${restore}"
    if gcloud beta container backup-restore restores delete "${restore}" --project=${PROJECT_ID} --location=${LOCATION} --restore-plan=${restore_plan} --quiet; then
      echo "Restore ${restore} deleted successfully"
    else
      echo "Failed to delete restore ${restore}"
    fi
  done
}

# Function to delete restore plans
delete_restore_plans() {
  restore_plans=$(gcloud beta container backup-restore restore-plans list --project=${PROJECT_ID} --location=${LOCATION} --format="value(name)")
  for restore_plan in ${restore_plans}; do
    echo "Deleting restores in restore plan: ${restore_plan}"
    delete_restores "${restore_plan}"
    echo "Deleting restore plan: ${restore_plan}"
    if gcloud beta container backup-restore restore-plans delete "${restore_plan}" --project=${PROJECT_ID} --location=${LOCATION} --quiet; then
      echo "Restore plan ${restore_plan} deleted successfully"
    else
      echo "Failed to delete restore plan ${restore_plan}"
    fi
  done
}

# Prompt for confirmation before deleting resources
read -p "This script will delete all backups, backup plans, restores, and restore plans in the project ${PROJECT_ID} and location ${LOCATION}. Are you sure you want to proceed? (y/n) " -n 1 -r
echo
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
  echo "Deletion aborted"
  exit 1
fi

# Execute the deletion
echo "Starting deletion of all backups, backup plans, restores, and restore plans"
delete_backup_plans
# delete_restore_plans
echo "Deletion complete"