5

On one of my ubuntu server, top command start very slow, when I run top command on terminal, it will display info after more than 10 seconds.

Then I use strace -yy top to analysis the problem, I found top hang at the following point:

getsockopt(6<UNIX:[366379599]>, SOL_SOCKET, SO_SNDBUF, [212992], [4]) = 0                                                                 [0/1911]
setsockopt(6<UNIX:[366379599]>, SOL_SOCKET, SO_SNDBUFFORCE, [8388608], 4) = 0
connect(6<UNIX:[366379599]>, {sa_family=AF_UNIX, sun_path="/run/dbus/system_bus_socket"}, 29) = 0
getsockopt(6<UNIX:[366379599->366381191]>, SOL_SOCKET, SO_PEERCRED, {pid=1, uid=0, gid=0}, [12]) = 0
getsockopt(6<UNIX:[366379599->366381191]>, SOL_SOCKET, SO_PEERSEC, "unconfined", [64->10]) = 0
getsockopt(6<UNIX:[366379599->366381191]>, SOL_SOCKET, SO_PEERGROUPS, "", [256->0]) = 0
fstat(6<UNIX:[366379599->366381191]>, {st_mode=S_IFSOCK|0777, st_size=0, ...}) = 0
getsockopt(6<UNIX:[366379599->366381191]>, SOL_SOCKET, SO_ACCEPTCONN, [0], [4]) = 0
getsockname(6<UNIX:[366379599->366381191]>, {sa_family=AF_UNIX}, [128->2]) = 0
geteuid()                               = 0
sendmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="\0AUTH EXTERNAL ", iov_len=15}, {iov_base="30", iov_len
=2}, {iov_base="\r\nNEGOTIATE_UNIX_FD\r\nBEGIN\r\n", iov_len=28}], msg_iovlen=3, msg_controllen=0, msg_flags=0}, MSG_DONTWAIT|MSG_NOSIGNAL) = 45
gettid()                                = 25127
getrandom("\x00\x3c\x86\xd4\x34\xa0\xb2\x91\xee\x2d\x4b\x2c\x1b\x56\xda\xa8", 16, GRND_NONBLOCK) = 16
futex(0x7f00826d9ff0, FUTEX_WAKE_PRIVATE, 2147483647) = 0
recvmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="OK 141ec8c80f29bf83dc576f5e61641"..., iov_len=256}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = 52
sendmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\1\0\1\0\0\0\0\1\0\0\0m\0\0\0\1\1o\0\25\0\0\0/org/fre"
..., iov_len=128}], msg_iovlen=1, msg_controllen=0, msg_flags=0}, MSG_DONTWAIT|MSG_NOSIGNAL) = 128
recvmsg(6<UNIX:[366379599->366381191]>, {msg_namelen=0}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = -1 EAGAIN (Resource temporarily unavailable
)
ppoll([{fd=6<UNIX:[366379599->366381191]>, events=POLLIN}], 1, {tv_sec=24, tv_nsec=999897000}, NULL, 8) = 1 ([{fd=6, revents=POLLIN}], left {tv_sec=24, tv_nsec=999818992})
recvmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\2\1\1\17\0\0\0\1\0\0\0E\0\0\0\6\1s\0\n\0\0\0", iov_le
n=24}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = 24
recvmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base=":1.2843938\0\0\0\0\0\0\5\1u\0\1\0\0\0\10\1g\0\1s\0\0"..
., iov_len=79}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = 79
sendmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\1\0\1\4\0\0\0\2\0\0\0\247\0\0\0\1\1o\0\31\0\0\0/org/f
re"..., iov_len=184}, {iov_base="w\364\0\0", iov_len=4}], msg_iovlen=2, msg_controllen=0, msg_flags=0}, MSG_DONTWAIT|MSG_NOSIGNAL) = 188
recvmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\4\1\1\17\0\0\0\2\0\0\0\225\0\0\0\1\1o\0\25\0\0\0", io
v_len=24}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = 24
recvmsg(6<UNIX:[366379599->366381191]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="/org/freedesktop/DBus\0\0\0\2\1s\0\24\0\0\0"..., iov_le
n=159}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = 159
recvmsg(6<UNIX:[366379599->366381191]>, {msg_namelen=0}, MSG_DONTWAIT|MSG_NOSIGNAL|MSG_CMSG_CLOEXEC) = -1 EAGAIN (Resource temporarily unavailable
)
ppoll([{fd=6<UNIX:[366379599->366381191]>, events=POLLIN}], 1, {tv_sec=24, tv_nsec=999853000}, NULL, 8

And the command ls -l /proc/$(pgrep top)/fd/ get the following output:

lrwx------ 1 root root 64 1月  25 18:11 0 -> /dev/pts/9
lrwx------ 1 root root 64 1月  25 18:11 1 -> /dev/pts/9
l-wx------ 1 root root 64 1月  25 18:11 2 -> /dev/null
lrwx------ 1 root root 64 1月  25 18:11 3 -> /dev/pts/9
lr-x------ 1 root root 64 1月  25 18:11 4 -> /proc/uptime
lr-x------ 1 root root 64 1月  25 18:11 5 -> /proc
lrwx------ 1 root root 64 1月  25 18:11 6 -> 'socket:[364876425]'

lsof -d 6 -U -a +E -p $(pgrep top) get following output:

COMMAND     PID       USER   FD   TYPE             DEVICE SIZE/OFF      NODE NAME
dbus-daem   874 messagebus   12u  unix 0xffff9545f6fee400      0t0 366381191 /var/run/dbus/system_bus_socket type=STREAM ->INO=366379599 25127,top,6u
top       25127       root    6u  unix 0xffff9545f6fefc00      0t0 366379599 type=STREAM ->INO=366381191 874,dbus-daem,12u

After top works as normal, the command ls -l /proc/$(pgrep top)/fd/ get the following output:

lrwx------ 1 root root 64 1月  25 18:11 0 -> /dev/pts/9
lrwx------ 1 root root 64 1月  25 18:11 1 -> /dev/pts/9
l-wx------ 1 root root 64 1月  25 18:11 2 -> /dev/null
lrwx------ 1 root root 64 1月  25 18:11 3 -> /dev/pts/9
lr-x------ 1 root root 64 1月  25 18:11 4 -> /proc/uptime
lr-x------ 1 root root 64 1月  25 18:11 5 -> /proc/meminfo
lr-x------ 1 root root 64 1月  25 18:11 6 -> /proc/loadavg
lr-x------ 1 root root 64 1月  25 18:12 7 -> /proc/stat

lsof -d 6 -U -a +E -p $(pgrep top) output nothing.

I want to know the reason that why top command start so slow, can anyone help me?

0

1 Answer 1

0

When I look at my own strace, I see that the only recvmsg and poll happen when top is trying to communicate with the socket file of nscd (/var/run/nscd/socket).

socket(PF_LOCAL, SOCK_STREAM|SOCK_CLOEXEC|SOCK_NONBLOCK, 0) = 10
connect(10<UNIX:[355032774]>, {sa_family=AF_LOCAL, sun_path="/var/run/nscd/socket"}, 110) = 0
sendto(10<UNIX:[355032774->355027846]>, "\2\0\0\0\v\0\0\0\7\0\0\0passwd\0", 19, MSG_NOSIGNAL, NULL, 0) = 19
poll([{fd=10<UNIX:[355032774]>, events=POLLIN|POLLERR|POLLHUP}], 1, 5000) = 1 ([{fd=10, revents=POLLIN|POLLHUP}])
recvmsg(10<UNIX:[355032774]>, {msg_name(0)=NULL, msg_iov(2)=[{"passwd\0", 7}, {"\264K\5\0\0\0\0\0", 8}], msg_controllen=20, [{cmsg_len=20, cmsg_level=SOL_SOCKET, cmsg_type=SCM_RIGHTS, [11</run/nscd/passwd>]}], msg_flags=MSG_CMSG_CLOEXEC}, MSG_CMSG_CLOEXEC) = 15

If that's what you also see, you may have a problem with nscd. top tries to get passwd information from the nscd cache, and the process might be hung. Please try to stop nscd (service nscd stop) and see if it solves the problem. If it does solve the problem with top, try to restart nscd again (service nscd start) and check if after restart nscd works properly.

1
  • 1
    There is no nscd service on my ubuntu server. I have updated my question to show more strace info. Commented Jan 25, 2022 at 12:04

You must log in to answer this question.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.