Skip to content

dnode cache overquota but not shriking even if under memory pressure #17487

@shodanshok

Description

@shodanshok

System information

Type Version/Name
Distribution Name Rocky Linux 9
Distribution Version 9.6
Kernel Version 5.14.0-570.21.1.el9_6.x86_64
Architecture x86_64
OpenZFS Version 2.2.8-1

Describe the problem you're observing

dnode cache not shrinking even if over quota and under memory pressure

Describe how to reproduce the problem

Example below:

# dataset creation
[root@localhost ~]# zfs create tank/fsmark -o compression=lz4 -o xattr=sa -o relatime=on

# create 10M files
[root@localhost ~]# fs_mark -k -s0 -S0 -D10 -N1000 -n 1000000 -d /tank/fsmark/0
[root@localhost ~]# for i in `seq 1 9`; do cp -a /tank/fsmark/0 /tank/fsmark/$i; done

# reset ARC
[root@localhost ~]# zpool export tank; zpool import tank
[root@localhost ~]# arc_summary -s arc | grep "ARC size\|Target\|Dnode"
ARC size (current):                                     0.2 %    5.9 MiB
        Target size (adaptive):                        65.4 %    2.5 GiB
        Dnode cache target:                            10.0 %  384.1 MiB
        Dnode cache size:                               0.1 %  198.0 KiB

# stat dnodes via du -hs
[root@localhost ~]# du -hs /tank/fsmark

# arc_c collapsed...
[root@localhost ~]# arcstat 1
    time  read  ddread  ddh%  dmread  dmh%  pread  ph%   size      c  avail
23:16:49     0       0     0       0     0      0    0   1.1G   2.5G   5.8G
23:16:50     3       0     0       3   100      0    0   1.1G   2.5G   5.8G
23:16:51  231K       0     0    230K    99   1.0K  100   1.2G   2.5G   5.6G
23:16:52  276K       0     0    275K    99   1.0K  100   1.4G   2.5G   5.2G
23:16:53  291K       0     0    291K    99      0    0   1.5G   2.5G   4.9G
23:16:54  278K       0     0    277K    99   1.0K  100   1.7G   2.5G   4.6G
23:16:55  276K       0     0    275K    99   1.0K  100   1.8G   2.5G   4.3G
23:16:56  280K       0     0    279K    99   1.0K  100   2.0G   2.5G   4.0G
23:16:57  285K       0     0    285K    99      0    0   2.1G   2.5G   3.7G
23:16:58  274K       0     0    273K    99   1.0K  100   2.3G   2.5G   3.4G
23:16:59  274K       0     0    273K    99   1.0K  100   2.4G   2.5G   3.0G
23:17:00  286K       0     0    286K    99      0    0   2.6G   2.6G   2.7G
23:17:01  274K       0     0    273K    99   1.0K  100   2.7G   2.8G   2.4G
23:17:02  275K       0     0    274K    99   1.0K  100   2.9G   2.9G   2.1G
23:17:03  285K       0     0    285K    99      0    0   3.0G   3.1G   1.8G
23:17:04  272K       0     0    271K    99   1.0K  100   3.2G   3.2G   1.4G
23:17:05  268K       0     0    267K    99   1.0K  100   3.3G   3.4G   1.1G
23:17:06  281K       0     0    281K    99      0    0   3.5G   3.5G   827M
23:17:07  269K       0     0    268K    99   1.0K  100   3.6G   3.7G   506M
23:17:08  266K       0     0    265K    99   1.0K  100   3.8G   3.8G   217M
23:17:09  275K       0     0    275K    99      0    0   3.8G   3.8G    43M
23:17:10  260K       0     0    259K    99   1.0K    0   3.8G   3.8G  -106M
23:17:11  244K       0     0    243K    99   1.0K    0   3.7G   3.6G   -66M
...
23:19:10   75K       0     0     75K    87      0    0   3.4G   240M  -123M
23:19:11   73K       0     0     73K    86      0    0   3.2G   240M   116M
23:19:12   67K       0     0     67K    87      0    0   3.3G   240M    79M
23:19:13   41K       0     0     39K    88   1.5K    3   3.3G   240M    44M
23:19:14   56K       0     0     56K    87      0    0   3.3G   240M  -5.5M
23:19:15   70K       0     0     70K    87      0    0   3.3G   240M   -76M
23:19:16   67K       0     0     67K    86      0    0   3.2G   240M   142M
...
23:26:40     0       0     0       0     0      0    0   3.3G   240M   -15M
23:26:41     0       0     0       0     0      0    0   3.3G   240M   -15M
23:26:42     0       0     0       0     0      0    0   3.3G   240M   -15M
23:26:43     0       0     0       0     0      0    0   3.3G   240M   -15M

# ...but dnode cache remains way over quota even if du finished
[root@localhost ~]# arc_summary -s arc | grep "ARC size\|Target\|Dnode"
ARC size (current):                                    88.1 %    3.3 GiB
        Target size (adaptive):                         6.2 %  240.1 MiB
        Dnode cache target:                            10.0 %  384.1 MiB
        Dnode cache size:                             367.1 %    1.4 GiB

# at this point, getting other data or metadata in cache is quite difficult
# notice the slow speed of consecutive sequential reads of a previously-written 1G file
[root@localhost ~]# for i in `seq 1 3`; do dd if=/tank/fsmark/random.img of=/dev/null bs=1M count=1024 status=progress; done
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.624737 s, 1.7 GB/s
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.599355 s, 1.8 GB/s
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.602586 s, 1.8 GB/s

# arcstat shows uncached prefetched read
    time  read  ddread  ddh%  dmread  dmh%  pread  ph%   size      c  avail
00:24:54     0       0     0       0     0      0    0   3.3G   1.5G   2.5M
00:24:55   52K     13K    99     26K    99    13K    0   3.3G   1.5G   -32M
00:24:56   45K     11K    99     22K   100    11K    0   3.3G   1.4G   -30M
00:24:57     0       0     0       0     0      0    0   3.3G   1.4G   -24M

# drop caches, now ARC caching works correctly
[root@localhost ~]# echo 2 > /proc/sys/vm/drop_caches
[root@localhost ~]# for i in `seq 1 3`; do dd if=/tank/fsmark/random.img of=/dev/null bs=1M count=1024 status=progress; done
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.183439 s, 5.9 GB/s
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.111491 s, 9.6 GB/s
1024+0 records in
1024+0 records out
1073741824 bytes (1.1 GB, 1.0 GiB) copied, 0.109994 s, 9.8 GB/s

Include any warning/errors/backtraces from the system logs

None

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type: DefectIncorrect behavior (e.g. crash, hang)

    Type

    No type
    No fields configured for issues without a type.

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions