Closed lukemarsden closed 5 years ago
here's the prototype of the simpler version, which is just a single tmp snap: zfs-fastdiff.sh
#!/bin/bash
set -xe
if [ -z "$1" ]; then
echo "Please specify zfs dataset as first arg"
exit 1
fi
umount /1 || true
umount /2 || true
latest=$(zfs list -t snapshot,filesystem -r pool/dmfs/$1 |grep -v dotmesh-tmp-diff |grep -v zfs-diff |tail -n 1 |awk '{print $1}')
tmp="pool/dmfs/$1@dotmesh-tmp-diff"
zfs destroy $tmp || true
zfs snapshot $tmp
mount -t zfs $latest /1
mount -t zfs $tmp /2
(cd /1; find .) > 1
(cd /2; find .) > 2
diff 1 2 || true
umount /1
umount /2
zfs destroy $tmp
root@dotscience-runner-lukeperf7:~# time ./zfs-fastdiff.sh 8c296418-6195-4863-9256-c0cce5de95c2 > /dev/null
+ '[' -z 8c296418-6195-4863-9256-c0cce5de95c2 ']'
+ umount /1
+ umount /2
++ zfs list -t snapshot,filesystem -r pool/dmfs/8c296418-6195-4863-9256-c0cce5de95c2
++ grep -v dotmesh-tmp-diff
++ grep -v zfs-diff
++ tail -n 1
++ awk '{print $1}'
+ latest=pool/dmfs/8c296418-6195-4863-9256-c0cce5de95c2@c911722f-3a46-4187-9b5e-ad7b916cb552
+ tmp=pool/dmfs/8c296418-6195-4863-9256-c0cce5de95c2@dotmesh-tmp-diff
+ zfs destroy pool/dmfs/8c296418-6195-4863-9256-c0cce5de95c2@dotmesh-tmp-diff
+ zfs snapshot pool/dmfs/8c296418-6195-4863-9256-c0cce5de95c2@dotmesh-tmp-diff
+ mount -t zfs pool/dmfs/8c296418-6195-4863-9256-c0cce5de95c2@c911722f-3a46-4187-9b5e-ad7b916cb552 /1
+ mount -t zfs pool/dmfs/8c296418-6195-4863-9256-c0cce5de95c2@dotmesh-tmp-diff /2
+ cd /1
+ find .
+ cd /2
+ find .
+ diff 1 2
real 0m0.767s
user 0m0.202s
sys 0m0.157s
dynamic mountpoint version (for concurrent use)
#!/bin/bash
set -xe
DOTMESH_HOME=/var/lib/dotmesh
FSID=$1
if [ -z "$FSID" ]; then
echo "Please specify zfs dataset as first arg"
exit 1
fi
latest=$(zfs list -t snapshot,filesystem -r pool/dmfs/$1 |grep -v dotmesh-tmp-diff |grep -v zfs-diff |tail -n 1 |awk '{print $1}')
tmp="pool/dmfs/$FSID@dotmesh-tmp-diff"
latest_mnt="${DOTMESH_HOME}/diff-latest-${FSID}"
tmp_mnt="${DOTMESH_HOME}/diff-tmp-${FSID}"
mkdir -p $latest_mnt
mkdir -p $tmp_mnt
umount $latest_mnt || true
umount $tmp_mnt || true
zfs destroy $tmp || true
zfs snapshot $tmp
mount -t zfs $latest $latest_mnt
mount -t zfs $tmp $latest_mnt
(cd $latest_mnt; find . -printf "%T+ %s %p\n") > latest_files
(cd $tmp_mnt; find . -printf "%T+ %s %p\n") > tmp_files
diff latest_files tmp_files || true
umount $latest_mnt
umount $tmp_mnt
rmdir $latest_mnt
rmdir $tmp_mnt
zfs destroy $tmp
We could always race these and accept the answer from whichever responds first.
prototype an alternative ... maybe something like
dotmesh diff stream, start stream…
diff $(find $latest) $(find $tmp1)
and cache this result Rsecond time:
diff $(find $tmp1) $(find tmp2)
and combine this with cached result RNth time:
when stream is disconnected, clean up $tmp1 and $tmp2 if exists