root@truenas[~]# for n in {a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,aa,ab,ac,ad,ae,af,ag}; do zdb -l "/dev/sd"$n"2" | grep 'name\|txg' done name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 create_txg: 4 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 create_txg: 2241036 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 name: 'NZB-Scratch' txg: 2265203 hostname: 'truenas' create_txg: 4 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 create_txg: 203597 name: 'boot-pool' txg: 6790201 hostname: '(none)' create_txg: 4 create_txg: 4 create_txg: 4 name: 'boot-pool' txg: 6790201 hostname: '(none)' create_txg: 4 create_txg: 4 create_txg: 4 #6 drives without swap partition root@truenas[~]# for n in {a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z}; do zdb -l "/dev/sd"$n"1" | grep 'name\|txg' done name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 name: 'Home' txg: 3080242 hostname: 'truenas' create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030 create_txg: 2994030
echo 0 >> /sys/module/zfs/parameters/spa_load_verify_data echo 0 >> /sys/module/zfs/parameters/spa_load_verify_metadata zpool import -f -T 3080241 Home
Tried 3080240, got another rebootYep. We might need to change something to stop it immediately rebooting on panic/failure though if it continues to do it.
root@truenas[~]# zpool import pool: NZB-Scratch id: 9983942308470126613 state: ONLINE action: The pool can be imported using its name or numeric identifier. config: NZB-Scratch ONLINE 0f5eeaa9-3419-49f0-8601-f7132129256b ONLINE pool: Home id: 4985077989531387090 state: FAULTED status: The pool metadata is corrupted. action: The pool cannot be imported due to damaged devices or data. The pool may be active on another system, but can be imported using the '-f' flag. see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-72 config: Home FAULTED corrupted data raidz2-0 ONLINE a7d78b0d-f891-11ed-a2f8-90e2baf17bf0 ONLINE a7b00eef-f891-11ed-a2f8-90e2baf17bf0 ONLINE a7d01f81-f891-11ed-a2f8-90e2baf17bf0 ONLINE a7c951e3-f891-11ed-a2f8-90e2baf17bf0 ONLINE a7bfef1b-f891-11ed-a2f8-90e2baf17bf0 ONLINE e4f37ae1-f494-4baf-94e5-07db0c38cb0c ONLINE raidz2-1 ONLINE 8cca2c8f-39ee-40a6-88e0-24ddf3485aa0 ONLINE 74f3cc23-1b32-4faf-89cc-ba0cd72ba308 ONLINE 4e5f5b16-6c2b-4e6b-a907-3e1b9b1c4886 ONLINE cde58bb6-9d8e-4cdc-a1bf-847f459b459b ONLINE 58c22778-521b-4e8f-aadd-6d5ad17a8f68 ONLINE 33633f68-920b-4a40-bd4d-45e30b6872bc ONLINE raidz2-2 ONLINE 2a2e5211-d4ea-4da9-8ea5-bdabdc542bdb ONLINE 56c07fd7-6cb6-4985-9a20-2b5ff9d42631 ONLINE 1147286d-8cd8-4025-8e5d-bbf06e2bd795 ONLINE 7e1fa408-7565-4913-b045-49447ef9253b ONLINE 3d56d2fa-d505-4bea-b9a2-80c121e4e559 ONLINE a9906b32-2690-4f7b-8d8f-00ca915d8f3d ONLINE raidz2-5 ONLINE b8c63108-353b-4ed7-a927-ca3df817bd21 ONLINE 58782264-02f1-41c6-9b91-d07144cb0ccb ONLINE 03df98a5-a86d-4bc8-879a-5cf611d4306c ONLINE 022c7ffb-0a07-45cb-b3af-ad1730a08054 ONLINE a5786a1f-a7ad-4a30-877a-88a03c94a774 ONLINE 4c59238e-5cbd-428e-8a72-a018d9dae9c2 ONLINE logs mirror-6 ONLINE 5ba1f70b-be51-470f-94ed-777683425477 ONLINE f2605776-46a9-4455-a4bc-322d4cf8a688 ONLINE
failmode
property. If it is set to panic, then it may very well reboot.zdb
.echo 0 >> /sys/module/zfs/parameters/spa_load_verify_data
echo 0 >> /sys/module/zfs/parameters/spa_load_verify_metadata
zpool import -f -T 3080241 -o failmode=continue Home
continue
does not work, perhaps wait
will.I'll give that a shot, I edited my last post, but should the pool be reporting "The pool metadata is corrupted" if i run 'zpool import' due to the process of trying to roll back?You might check the pool'sfailmode
property. If it is set to panic, then it may very well reboot.
Of course, checking an exported pool's property would be tricky. I don't know how to do it withzdb
.
But, their may be an easier way;
echo 0 >> /sys/module/zfs/parameters/spa_load_verify_data echo 0 >> /sys/module/zfs/parameters/spa_load_verify_metadata zpool import -f -T 3080241 -o failmode=continue Home
Ifcontinue
does not work, perhapswait
will.
Neither wait nor continue kept the machine from rebootingecho 0 >> /sys/module/zfs/parameters/spa_load_verify_data echo 0 >> /sys/module/zfs/parameters/spa_load_verify_metadata zpool import -f -T 3080241 -o failmode=continue Home
Ifcontinue
does not work, perhapswait
will.
root@truenas[~]# service zfs-import-cache status × zfs-import-cache.service - Import ZFS pools by cache file Loaded: loaded (/lib/systemd/system/zfs-import-cache.service; enabled; preset: disabled) Active: failed (Result: exit-code) since Sat 2023-11-18 07:33:42 CST; 6min ago Docs: man:zpool(8) Main PID: 1897 (code=exited, status=1/FAILURE) CPU: 209ms Nov 18 07:33:39 truenas zpool[1897]: cannot import 'Home': I/O error Nov 18 07:33:42 truenas zpool[1897]: cannot import 'Home': I/O error Nov 18 07:33:42 truenas zpool[1897]: Destroy and re-create the pool from Nov 18 07:33:42 truenas zpool[1897]: a backup source. Nov 18 07:33:42 truenas zpool[1897]: cachefile import failed, retrying Nov 18 07:33:42 truenas zpool[1897]: Destroy and re-create the pool from Nov 18 07:33:42 truenas zpool[1897]: a backup source. Nov 18 07:33:42 truenas systemd[1]: zfs-import-cache.service: Main process exited, code=exited, status=1/FAILURE Nov 18 07:33:42 truenas systemd[1]: zfs-import-cache.service: Failed with result 'exit-code'. Nov 18 07:33:42 truenas systemd[1]: Failed to start zfs-import-cache.service - Import ZFS pools by cache file.
root@truenas[~]# journalctl -xeu zfs-import-cache.service Nov 18 07:33:31 truenas systemd[1]: Starting zfs-import-cache.service - Import ZFS pools by cache file... ░░ Subject: A start job for unit zfs-import-cache.service has begun execution ░░ Defined-By: systemd ░░ Support: https://www.debian.org/support ░░ ░░ A start job for unit zfs-import-cache.service has begun execution. ░░ ░░ The job identifier is 184. Nov 18 07:33:39 truenas zpool[1897]: cannot import 'Home': I/O error Nov 18 07:33:42 truenas zpool[1897]: cannot import 'Home': I/O error Nov 18 07:33:42 truenas zpool[1897]: Destroy and re-create the pool from Nov 18 07:33:42 truenas zpool[1897]: a backup source. Nov 18 07:33:42 truenas zpool[1897]: cachefile import failed, retrying Nov 18 07:33:42 truenas zpool[1897]: Destroy and re-create the pool from Nov 18 07:33:42 truenas zpool[1897]: a backup source. Nov 18 07:33:42 truenas systemd[1]: zfs-import-cache.service: Main process exited, code=exited, status=1/FAILURE ░░ Subject: Unit process exited ░░ Defined-By: systemd ░░ Support: https://www.debian.org/support ░░ ░░ An ExecStart= process belonging to unit zfs-import-cache.service has exited. ░░ ░░ The process' exit code is 'exited' and its exit status is 1. Nov 18 07:33:42 truenas systemd[1]: zfs-import-cache.service: Failed with result 'exit-code'. ░░ Subject: Unit failed ░░ Defined-By: systemd ░░ Support: https://www.debian.org/support ░░ ░░ The unit zfs-import-cache.service has entered the 'failed' state with result 'exit-code'. Nov 18 07:33:42 truenas systemd[1]: Failed to start zfs-import-cache.service - Import ZFS pools by cache file. ░░ Subject: A start job for unit zfs-import-cache.service has failed ░░ Defined-By: systemd ░░ Support: https://www.debian.org/support ░░ ░░ A start job for unit zfs-import-cache.service has finished with a failure. ░░ ░░ The job identifier is 184 and the job result is failed. Nov 18 07:41:28 truenas systemd[1]: Starting zfs-import-cache.service - Import ZFS pools by cache file... ░░ Subject: A start job for unit zfs-import-cache.service has begun execution ░░ Defined-By: systemd ░░ Support: https://www.debian.org/support ░░ ░░ A start job for unit zfs-import-cache.service has begun execution. ░░ ░░ The job identifier is 911. Nov 18 07:41:30 truenas zpool[10279]: cannot import 'Home': I/O error Nov 18 07:41:32 truenas zpool[10279]: cannot import 'Home': I/O error Nov 18 07:41:32 truenas zpool[10279]: Destroy and re-create the pool from Nov 18 07:41:32 truenas zpool[10279]: a backup source. Nov 18 07:41:32 truenas zpool[10279]: cachefile import failed, retrying Nov 18 07:41:32 truenas zpool[10279]: Destroy and re-create the pool from Nov 18 07:41:32 truenas zpool[10279]: a backup source. Nov 18 07:41:32 truenas systemd[1]: zfs-import-cache.service: Main process exited, code=exited, status=1/FAILURE ░░ Subject: Unit process exited ░░ Defined-By: systemd ░░ Support: https://www.debian.org/support ░░ ░░ An ExecStart= process belonging to unit zfs-import-cache.service has exited. ░░ ░░ The process' exit code is 'exited' and its exit status is 1. Nov 18 07:41:32 truenas systemd[1]: zfs-import-cache.service: Failed with result 'exit-code'. ░░ Subject: Unit failed ░░ Defined-By: systemd ░░ Support: https://www.debian.org/support ░░ ░░ The unit zfs-import-cache.service has entered the 'failed' state with result 'exit-code'. Nov 18 07:41:32 truenas systemd[1]: Failed to start zfs-import-cache.service - Import ZFS pools by cache file. ░░ Subject: A start job for unit zfs-import-cache.service has failed ░░ Defined-By: systemd ░░ Support: https://www.debian.org/support ░░ ░░ A start job for unit zfs-import-cache.service has finished with a failure. ░░ ░░ The job identifier is 911 and the job result is failed.
I really hope that's not the case. Wouldn't truenas prevent the user from doing something like that with at least a warning?I have no further suggestions.
It would be really good to know why the pool corruption occurred. Early on in this thread, enabling swap seems to have triggered the corruption. I wonder if SCALE tried to use the first partition as swap, damaging the first part of the new vDev disks.
Nov 18 08:07:09 truenas.local kernel: WARNING: Pool 'Home' has encountered an uncorrectable I/O failure and has been suspended.
Nov 18 09:26:54 truenas.local kernel: WARNING: can't open objset 59600, error 5 Nov 18 09:26:54 truenas.local kernel: WARNING: can't open objset 61095, error 5 Nov 18 09:26:54 truenas.local kernel: WARNING: can't open objset 60607, error 5 Nov 18 09:26:54 truenas.local kernel: WARNING: can't open objset 1044, error 5 Nov 18 09:26:54 truenas.local kernel: WARNING: can't open objset 261, error 5 Nov 18 09:26:54 truenas.local kernel: WARNING: can't open objset for 60607, error 5 Nov 18 09:26:54 truenas.local kernel: WARNING: can't open objset for 61095, error 5 Nov 18 09:26:54 truenas.local kernel: WARNING: can't open objset for 1044, error 5 Nov 18 09:26:54 truenas.local kernel: WARNING: can't open objset for 59600, error 5 Nov 18 09:26:54 truenas.local kernel: WARNING: can't open objset for 261, error 5
root@truenas[~]# echo 0 >> /sys/module/zfs/parameters/spa_load_verify_metadata root@truenas[~]# echo 0 >> /sys/module/zfs/parameters/spa_load_verify_data root@truenas[~]# zpool import -f -T 3080103 Home cannot mount 'Home/Media': Input/output error Import was successful, but unable to mount some datasets#
root@truenas[~]# zpool status -v Home pool: Home state: ONLINE status: One or more devices has experienced an error resulting in data corruption. Applications may be affected. action: Restore the file in question if possible. Otherwise restore the entire pool from backup. see: https://openzfs.github.io/openzfs-docs/msg/ZFS-8000-8A scan: resilvered 1.32T in 02:06:45 with 0 errors on Sat Nov 11 13:45:36 2023 config: NAME STATE READ WRITE CKSUM Home ONLINE 0 0 0 raidz2-0 ONLINE 0 0 0 a7d78b0d-f891-11ed-a2f8-90e2baf17bf0 ONLINE 0 0 1 a7b00eef-f891-11ed-a2f8-90e2baf17bf0 ONLINE 0 0 1 a7d01f81-f891-11ed-a2f8-90e2baf17bf0 ONLINE 0 0 1 a7c951e3-f891-11ed-a2f8-90e2baf17bf0 ONLINE 0 0 0 a7bfef1b-f891-11ed-a2f8-90e2baf17bf0 ONLINE 0 0 0 e4f37ae1-f494-4baf-94e5-07db0c38cb0c ONLINE 0 0 0 raidz2-1 ONLINE 0 0 0 8cca2c8f-39ee-40a6-88e0-24ddf3485aa0 ONLINE 0 0 2 74f3cc23-1b32-4faf-89cc-ba0cd72ba308 ONLINE 0 0 14 4e5f5b16-6c2b-4e6b-a907-3e1b9b1c4886 ONLINE 0 0 14 cde58bb6-9d8e-4cdc-a1bf-847f459b459b ONLINE 0 0 14 58c22778-521b-4e8f-aadd-6d5ad17a8f68 ONLINE 0 0 2 33633f68-920b-4a40-bd4d-45e30b6872bc ONLINE 0 0 2 raidz2-2 ONLINE 0 0 0 2a2e5211-d4ea-4da9-8ea5-bdabdc542bdb ONLINE 0 0 0 56c07fd7-6cb6-4985-9a20-2b5ff9d42631 ONLINE 0 0 0 1147286d-8cd8-4025-8e5d-bbf06e2bd795 ONLINE 0 0 12 7e1fa408-7565-4913-b045-49447ef9253b ONLINE 0 0 12 3d56d2fa-d505-4bea-b9a2-80c121e4e559 ONLINE 0 0 12 a9906b32-2690-4f7b-8d8f-00ca915d8f3d ONLINE 0 0 0 raidz2-5 ONLINE 0 0 0 b8c63108-353b-4ed7-a927-ca3df817bd21 ONLINE 0 0 0 58782264-02f1-41c6-9b91-d07144cb0ccb ONLINE 0 0 0 03df98a5-a86d-4bc8-879a-5cf611d4306c ONLINE 0 0 0 022c7ffb-0a07-45cb-b3af-ad1730a08054 ONLINE 0 0 0 a5786a1f-a7ad-4a30-877a-88a03c94a774 ONLINE 0 0 0 4c59238e-5cbd-428e-8a72-a018d9dae9c2 ONLINE 0 0 0 logs mirror-6 ONLINE 0 0 0 5ba1f70b-be51-470f-94ed-777683425477 ONLINE 0 0 0 f2605776-46a9-4455-a4bc-322d4cf8a688 ONLINE 0 0 0 errors: Permanent errors have been detected in the following files:
NR START END SECTORS SIZE NAME UUID 1 128 4194304 4194177 2G 2 4194432 39063650270 39059455839 18.2T
NR START END SECTORS SIZE NAME UUID 1 2048 39063650270 39063648223 18.2T
Hopefully you're not experiencing the other issues I am?Hey all, just chiming in, I have the same issue with one of our servers. Added a third z2 vdev after the 23.10.0.1 upgrade and am now seeing the same Mixed VDEV Capacities error.
Pre 23.10.0.1 vdev partx -s /dev/
Code:NR START END SECTORS SIZE NAME UUID 1 128 4194304 4194177 2G 2 4194432 39063650270 39059455839 18.2T
Post 23.10.0.1 vdev partx -s /dev/
Code:NR START END SECTORS SIZE NAME UUID 1 2048 39063650270 39063648223 18.2T