Closed alexey-milovidov closed 6 months ago
$ cat adsblol.sh
#!/bin/bash
source config
mkdir -p adsblol
pushd adsblol
mkdir lock || exit
trap 'rmdir lock' EXIT
# Download the file. Process and upload them to S3. Remove the file and update the last date.
NEXT=$(clickhouse-local --query "SELECT '$(cat last)'::Date + 1")
YEAR=$(clickhouse-local --query "SELECT toYear('$NEXT'::Date)")
DATE_FORMATTED=$(clickhouse-local --query "SELECT formatDateTime('$NEXT'::Date, '%Y.%m.%d')")
PATCH=0
NAME="v${DATE_FORMATTED}-planes-readsb-prod-${PATCH}"
export CLICKHOUSE_PLANES_HOST
export CLICKHOUSE_PLANES_USER
export CLICKHOUSE_PLANES_PASSWORD
export TABLE=default.planes_adsblol_loading
for SUFFIX in '' .a{a..z}
do
URL="https://github.com/adsblol/globe_history_${YEAR}/releases/download/${NAME}/${NAME}.tar${SUFFIX}"
wget --no-verbose --continue "$URL" && aws s3 cp --no-progress "${NAME}.tar${SUFFIX}" "s3://clickhouse-public-datasets/adsblol/original/${NAME}.tar${SUFFIX}"
done
cat "${NAME}.tar"* > "${NAME}.all.tar" &&
mkdir -p "$NAME" && (cd "$NAME" && tar xf "../${NAME}.all.tar") && rm "${NAME}.all.tar" "${NAME}.tar"* &&
clickhouse-client ${CLICKHOUSE_PLANES_PARAMS} --query "CREATE OR REPLACE TABLE ${TABLE} AS planes_mercator" &&
find *readsb*/traces -name '*.json' | xargs -P 100 -L1 ../adsblol-process-file.sh &&
clickhouse-client ${CLICKHOUSE_PLANES_PARAMS} --query "INSERT INTO planes_mercator SELECT * FROM ${TABLE}" &&
rm -rf *readsb* && mv last prev && echo ${NEXT} > last
It should catch up in a few hours...
It finished loading, and I also reloaded the problematic day 2024-03-22 manually.
Release assets now split over multiple files, e.g.
v2024.04.16-planes-readsb-staging-0.tar.aa
, and we need to support them in the import scripts.