netdata / netdata

Architected for speed. Automated for easy. Monitoring and troubleshooting, transformed!
https://www.netdata.cloud
GNU General Public License v3.0
70.39k stars 5.85k forks source link

[Feat]: Netdata hot backup #18201

Open ktsaou opened 1 month ago

ktsaou commented 1 month ago

Problem

Netdata files in /var/cache/netdata support hot backup, that is a backup without restarting the agent.

Description

This script can create such a hot backup on any running netdata:

#!/bin/env bash
me="${0}"

set -e

# include known custom netdata directories in the path
for x in /opt/netdata/usr/sbin /usr/local/sbin "${HOME}/netdata/usr/sbin"
do
    [ -x "${x}/netdata" ] && PATH="${PATH}:${x}"
done

# get a netdata configured variable
netdata_get_config() {
    local section="$1"
    local key="$2"
    local value="$3"
    netdata -W get "${section}" "${key}" "${value}" 2>/dev/null
}

config_dir="$(netdata_get_config 'directories' 'config' '/etc/netdata')"
if [ -z "${config_dir}" -o ! -d "${config_dir}" ]
then
    echo >&2 "Cannot find Netdata config directory '${config_dir}'"
    exit 1
else
    echo >&2 "Netdata config directory in '${config_dir}'"
fi

cache_dir="$(netdata_get_config 'directories' 'cache' '/var/cache/netdata')"
if [ -z "${cache_dir}" -o ! -d "${cache_dir}" ]
then
    echo >&2 "Cannot find Netdata cache directory '${cache_dir}'"
    exit 1
else
    echo >&2 "Netdata cache directory in '${cache_dir}'"
fi

lib_dir="$(netdata_get_config 'directories' 'lib' '/var/lib/netdata')"
if [ -z "${lib_dir}" -o ! -d "${lib_dir}" ]
then
    echo >&2 "Cannot find Netdata lib directory '${lib_dir}'"
    exit 1
else
    echo >&2 "Netdata lib directory in '${lib_dir}'"
fi

echo >&2 ""

backup_mode="rsync"
backup_cache=1
backup_lib=0
backup_config=0
backup_dst=
usage() {
    cat >&2 <<EOF_USAGE

Usage: ${me} [--[no-]config] [--[no-]lib] [--[no-]cache] [--all] [--tar|--rsync] destination

 --config       enable backup of '${config_dir}'
 --no-config    disable backup of '${config_dir}'
                setting: $(if [ ${backup_config} -eq 1 ]; then echo "enabled"; else echo "disabled"; fi)

 --lib          enable backup of '${lib_dir}'
 --no-lib       disable backup of '${lib_dir}'
                setting: $(if [ ${backup_lib} -eq 1 ]; then echo "enabled"; else echo "disabled"; fi)

 --cache        enable backup of '${cache_dir}'
 --no-cache     disable backup of '${cache_dir}'
                setting: $(if [ ${backup_cache} -eq 1 ]; then echo "enabled"; else echo "disabled"; fi)

 --all          enable everything above

 --tar
 --rsync
                Use either tar or rsync for the backup.
                setting: --${backup_mode}

 destination    whatever tar or rsync accepts as destination

EOF_USAGE

    exit 1
}

while [ ! -z "${1}" ]
do
    case "${1}" in
        --help|-h|help)
            usage
            exit 1;
            ;;

        --config)
            backup_config=1
            ;;

        --no-config)
            backup_config=0
            ;;

        --lib)
            backup_lib=1
            ;;

        --no-lib)
            backup_lib=0
            ;;

        --cache)
            backup_cache=1
            ;;

        --no-cache)
            backup_cache=0
            ;;

        --all)
            backup_lib=1
            backup_cache=1
            backup_config=1
            ;;

        --tar)
            backup_mode="tar"
            ;;

        --rsync)
            backup_mode="rsync"
            ;;

        *)
            if [ ! -z "${backup_dst}" ]
            then
                echo >&2 "Only one backup file must be given, first='${backup_dst}', second='${1}'"
                usage
                exit 1
            fi
            backup_dst="${1}"
            ;;
    esac
    shift 1
done

if [ -z "${backup_dst}" ]
then
    echo >&2 "No destination given."
    usage
    exit 1
fi

finished=0
temp_dir=$(mktemp -d)
cleanup() {
    [ ${finished} -ne 1 ] && tree "${temp_dir}"
    rm -rf -- "${temp_dir}"
}
trap cleanup EXIT HUP INT QUIT TERM

if [ ${backup_cache} -eq 1 ]
then
    dir="$(echo "${temp_dir}/${cache_dir}" | sed 's|//|/|g')"
    mkdir -p "${dir}"
    chown --reference="${cache_dir}" "${dir}"
    chmod --reference="${cache_dir}" "${dir}"

    echo >&2 "Exporting '${cache_dir}'"
    rsync -apL \
        --exclude "dbengine*" \
        --exclude "dbengine*/*" \
        --exclude "*.sock" \
        --exclude "*.db" \
        --exclude "*.db-shm" \
        --exclude "*.db-wal" \
        "${cache_dir}/" "${dir}/"

    # Back up each .db file in the temporary directory
    for db in "${cache_dir}"/*.db; do
        if [ -f "${db}" ]
        then
            echo >&2 " - Exporting up SQLite3 database '${db}'..."
            dst="${dir}/$(basename "${db}")"
            sqlite3 "${db}" ".backup '${dst}'"
            chown --reference="${db}" "${dst}"
            chmod --reference="${db}" "${dst}"
        fi
    done

    # Create symlinks for each dbengine directory
    for d in "${cache_dir}"/dbengine*; do
        if [ -d "${d}" ]
        then
            echo >&2 " - Linking DBENGINE directory ${d}..."
            ln -s "${d}" "${dir}/$(basename "${d}")"
        fi
    done
fi

if [ ${backup_lib} -eq 1 ]
then
    echo >&2 "Exporting '${lib_dir}'"
    dir="$(echo "${temp_dir}/${lib_dir}" | sed 's|//|/|g')"
    mkdir -p "${dir}"
    rsync -apL \
        --exclude "lock" \
        --exclude "lock/*" \
        --exclude "registry/netdata.public.unique.id" \
        --exclude "netdata.tarball.checksum" \
        --exclude "netdata_random_session_id" \
        "${lib_dir}/" "${dir}/"
fi

if [ ${backup_config} -eq 1 ]
then
    echo >&2 "Exporting '${config_dir}'"
    dir="$(echo "${temp_dir}/${config_dir}" | sed 's|//|/|g')"
    mkdir -p "${dir}"
    rsync -apL \
        --exclude "orig" \
        --exclude "orig/*" \
        "${config_dir}/" "${dir}/"
fi

echo >&2 ""

if [ "${backup_mode}" = "tar" ]
then
    echo >&2 "Creating tar compressed archive '${backup_dst}'..."
    tar -czhf "${backup_dst}" --ignore-failed-read -C "${temp_dir}" .

elif [ "${backup_mode}" = "rsync" ]
then
    echo >&2 "Synchronizing with rsync to '${backup_dst}'"
    rsync --archive --verbose --progress --copy-links --specials --perms --partial \
        "${temp_dir}/" "${backup_dst}/"

else
    echo >&2 "Unknown mode '${backup_mode}'".
    usage
    exit 1
fi

echo >&2 "Backup completed successfully to '${backup_dst}'."
finished=1

Importance

must have

Value proposition

  1. Netdata does support hot backup of the files
  2. Such a script is needed to replicate a parent

Proposed implementation

No response

stelfrag commented 1 month ago

sqlite3 CLI may not exist. It can be installed or the agent can provide a command via netdatacli (if agent running) or via a switch to export a db