# dataset creation
[root@localhost ~]# zfs create tank/fsmark -o compression=lz4 -o xattr=sa -o relatime=on
# create 10M files
[root@localhost ~]# fs_mark -k -s0 -S0 -D10 -N1000 -n 1000000 -d /tank/fsmark/0
[root@localhost ~]# for i in `seq 1 9`; do cp -a /tank/fsmark/0 /tank/fsmark/$i; done
# reset ARC
[root@localhost ~]# zpool export tank; zpool import tank
[root@localhost ~]# arc_summary -s arc | grep "ARC size\|Target\|Dnode"
ARC size (current): 0.2 % 5.9 MiB
Target size (adaptive): 65.4 % 2.5 GiB
Dnode cache target: 10.0 % 384.1 MiB
Dnode cache size: 0.1 % 198.0 KiB
# stat dnodes via du -hs
[root@localhost ~]# du -hs /tank/fsmark
# arc_c collapsed...
[root@localhost ~]# arcstat 1
time read ddread ddh% dmread dmh% pread ph% size c avail
23:16:49 0 0 0 0 0 0 0 1.1G 2.5G 5.8G
23:16:50 3 0 0 3 100 0 0 1.1G 2.5G 5.8G
23:16:51 231K 0 0 230K 99 1.0K 100 1.2G 2.5G 5.6G
23:16:52 276K 0 0 275K 99 1.0K 100 1.4G 2.5G 5.2G
23:16:53 291K 0 0 291K 99 0 0 1.5G 2.5G 4.9G
23:16:54 278K 0 0 277K 99 1.0K 100 1.7G 2.5G 4.6G
23:16:55 276K 0 0 275K 99 1.0K 100 1.8G 2.5G 4.3G
23:16:56 280K 0 0 279K 99 1.0K 100 2.0G 2.5G 4.0G
23:16:57 285K 0 0 285K 99 0 0 2.1G 2.5G 3.7G
23:16:58 274K 0 0 273K 99 1.0K 100 2.3G 2.5G 3.4G
23:16:59 274K 0 0 273K 99 1.0K 100 2.4G 2.5G 3.0G
23:17:00 286K 0 0 286K 99 0 0 2.6G 2.6G 2.7G
23:17:01 274K 0 0 273K 99 1.0K 100 2.7G 2.8G 2.4G
23:17:02 275K 0 0 274K 99 1.0K 100 2.9G 2.9G 2.1G
23:17:03 285K 0 0 285K 99 0 0 3.0G 3.1G 1.8G
23:17:04 272K 0 0 271K 99 1.0K 100 3.2G 3.2G 1.4G
23:17:05 268K 0 0 267K 99 1.0K 100 3.3G 3.4G 1.1G
23:17:06 281K 0 0 281K 99 0 0 3.5G 3.5G 827M
23:17:07 269K 0 0 268K 99 1.0K 100 3.6G 3.7G 506M
23:17:08 266K 0 0 265K 99 1.0K 100 3.8G 3.8G 217M
23:17:09 275K 0 0 275K 99 0 0 3.8G 3.8G 43M
23:17:10 260K 0 0 259K 99 1.0K 0 3.8G 3.8G -106M
23:17:11 244K 0 0 243K 99 1.0K 0 3.7G 3.6G -66M
...
23:19:10 75K 0 0 75K 87 0 0 3.4G 240M -123M
23:19:11 73K 0 0 73K 86 0 0 3.2G 240M 116M
23:19:12 67K 0 0 67K 87 0 0 3.3G 240M 79M
23:19:13 41K 0 0 39K 88 1.5K 3 3.3G 240M 44M
23:19:14 56K 0 0 56K 87 0 0 3.3G 240M -5.5M
23:19:15 70K 0 0 70K 87 0 0 3.3G 240M -76M
23:19:16 67K 0 0 67K 86 0 0 3.2G 240M 142M
...
23:26:40 0 0 0 0 0 0 0 3.3G 240M -15M
23:26:41 0 0 0 0 0 0 0 3.3G 240M -15M
23:26:42 0 0 0 0 0 0 0 3.3G 240M -15M
23:26:43 0 0 0 0 0 0 0 3.3G 240M -15M
# ...but dnode cache remains way over quota even if du finished
[root@localhost ~]# arc_summary -s arc | grep "ARC size\|Target\|Dnode"
ARC size (current): 88.1 % 3.3 GiB
Target size (adaptive): 6.2 % 240.1 MiB
Dnode cache target: 10.0 % 384.1 MiB
Dnode cache size: 367.1 % 1.4 GiB
# at this point, getting other data or metadata in cache is quite difficult
# notice the slow speed of consecutive sequential reads of a previously-written 1G file
[root@localhost ~]# for i in `seq 1 3`; do dd if=/tank/fsmark/random.img of=/dev/null bs=1M count=1024 status=progress; done
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.624737 s, 1.7 GB/s
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.599355 s, 1.8 GB/s
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.602586 s, 1.8 GB/s
# arcstat shows uncached prefetched read
time read ddread ddh% dmread dmh% pread ph% size c avail
00:24:54 0 0 0 0 0 0 0 3.3G 1.5G 2.5M
00:24:55 52K 13K 99 26K 99 13K 0 3.3G 1.5G -32M
00:24:56 45K 11K 99 22K 100 11K 0 3.3G 1.4G -30M
00:24:57 0 0 0 0 0 0 0 3.3G 1.4G -24M
# drop caches, now ARC caching works correctly
[root@localhost ~]# echo 2 > /proc/sys/vm/drop_caches
[root@localhost ~]# for i in `seq 1 3`; do dd if=/tank/fsmark/random.img of=/dev/null bs=1M count=1024 status=progress; done
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.183439 s, 5.9 GB/s
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.111491 s, 9.6 GB/s
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.109994 s, 9.8 GB/s
System information
Describe the problem you're observing
dnode cache not shrinking even if over quota and under memory pressure
Describe how to reproduce the problem
Example below:
Include any warning/errors/backtraces from the system logs
None