Closed jsalinasintel closed 6 years ago
This seems to be working (should also be tested by the buildbots):
root@linux:~# cat /sys/module/zfs/version
0.7.0-181_g62df1bc
root@linux:~# /etc/init.d/zfs-zed restart ; /etc/init.d/zfs-zed status
Restarting zfs-zed (via systemctl): zfs-zed.service.
● zfs-zed.service - ZFS Event Daemon (zed)
Loaded: loaded (/usr/lib/systemd/system/zfs-zed.service; disabled)
Active: active (running) since Sun 2017-11-19 12:01:40 CET; 15ms ago
Docs: man:zed(8)
Main PID: 570 ((zed))
CGroup: /system.slice/zfs-zed.service
└─570 (zed)
root@linux:~# for i in 1 2 3 4 5
> do
> truncate -s 128m $tmpdir/virtualdisk$i.img
> losetup /dev/loop$i $tmpdir/virtualdisk$i.img
> done
root@linux:~# echo "0 131072 linear /dev/loop1 0" | dmsetup create sanedev1
root@linux:~# echo "0 131072 linear /dev/loop3 0" | dmsetup create sanedev2
root@linux:~# echo "0 131072 linear /dev/loop4 0" | dmsetup create sanedev3
root@linux:~# echo "0 131072 linear /dev/loop5 0" | dmsetup create sanedev4
root@linux:~# echo "0 65536 linear /dev/loop2 0
> 65536 5 error
> 65541 65531 linear /dev/loop2 65541" | dmsetup create errdev1
root@linux:~# zpool create -f diskerrors raidz /dev/mapper/sanedev1 /dev/mapper/sanedev2 /dev/mapper/errdev1 /dev/mapper/sanedev4 spare /dev/mapper/sanedev3
root@linux:~# zpool status -v
pool: diskerrors
state: ONLINE
scan: none requested
config:
NAME STATE READ WRITE CKSUM
diskerrors ONLINE 0 0 0
raidz1-0 ONLINE 0 0 0
sanedev1 ONLINE 0 0 0
sanedev2 ONLINE 0 0 0
errdev1 ONLINE 0 0 0
sanedev4 ONLINE 0 0 0
spares
sanedev3 AVAIL
errors: No known data errors
root@linux:~# zpool events -c
cleared 21 events
root@linux:~# dd if=/dev/zero of=/diskerrors/data.bin &
[1] 866
root@linux:~# zpool events -f
TIME CLASS
Nov 19 2017 12:02:34.680000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.684000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.684000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.812000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.812000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.812000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.812000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.812000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.812000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.824000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.824000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.824000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.824000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.824000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.824000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.828000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.828000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.828000000 ereport.fs.zfs.io
Nov 19 2017 12:02:34.828000000 ereport.fs.zfs.io
Nov 19 2017 12:02:51.112000000 resource.fs.zfs.statechange
Nov 19 2017 12:02:54.176000000 sysevent.fs.zfs.config_sync
Nov 19 2017 12:02:54.204000000 sysevent.fs.zfs.vdev_spare
Nov 19 2017 12:02:54.204000000 sysevent.fs.zfs.vdev_attach
Nov 19 2017 12:02:56.404000000 sysevent.fs.zfs.resilver_start
Nov 19 2017 12:02:56.404000000 sysevent.fs.zfs.history_event
^C
root@linux:~# zpool status
pool: diskerrors
state: DEGRADED
status: One or more devices are faulted in response to persistent errors.
Sufficient replicas exist for the pool to continue functioning in a
degraded state.
action: Replace the faulted device, or use 'zpool clear' to mark the device
repaired.
scan: resilvered 9.30M in 0h0m with 0 errors on Sun Nov 19 12:03:00 2017
config:
NAME STATE READ WRITE CKSUM
diskerrors DEGRADED 0 0 0
raidz1-0 DEGRADED 0 0 0
sanedev1 ONLINE 0 0 0
sanedev2 ONLINE 0 0 0
spare-2 DEGRADED 0 0 0
errdev1 FAULTED 0 0 0 too many errors
sanedev3 ONLINE 0 0 0
sanedev4 ONLINE 0 0 0
spares
sanedev3 INUSE currently in use
errors: No known data errors
root@linux:~#
Please let me know if my process if flaws to have ZED bring a spare drive online. This test was designed to simulate EIO errors that ZED would detect and act upon. The detection appears to be working but no action such as downing the drive and bringing the spare online appeared to happen.
zpool create -f diskerrors raidz /dev/dm-0 /dev/dm-4 /dev/dm-2 /dev/dm-3 spare /dev/dm-1 zfs create diskerrors/coral-simulate-errors
after a few seconds of running:
ZFS has detected an io error:
Error writing block 727, fd= 3
Error writing block 732, fd= 3 write: No space left on device
Here is the messages from in /var/logm/messages from this run:
Also it does not appear that the debug log exists:
In case it just needed more errors I re-ran the same iozone command a couple of more times to generate more errors:
New messages in /var/log/messages:
It did not appear I was able to trigger a hot spare coming on line and downing of drive taking errors.