[Bugs] [Bug 1393677] stat of file is hung with possible deadlock
bugzilla at redhat.com
bugzilla at redhat.com
Thu Nov 10 07:09:26 UTC 2016
https://bugzilla.redhat.com/show_bug.cgi?id=1393677
--- Comment #1 from Pranith Kumar K <pkarampu at redhat.com> ---
PStack info of the pid:
[root at rhs-client14 ~]# pstack 5377
Thread 10 (Thread 0x7fce12ab6700 (LWP 5378)):
#0 0x00007fce1abbb00d in nanosleep () from /lib64/libpthread.so.0
#1 0x00007fce1bade265 in gf_timer_proc () from /usr/lib64/libglusterfs.so.0
#2 0x00007fce1abb3aa1 in start_thread () from /lib64/libpthread.so.0
#3 0x00007fce1a51caad in clone () from /lib64/libc.so.6
Thread 9 (Thread 0x7fce120b5700 (LWP 5379)):
#0 0x00007fce1abb8465 in ?? () from /lib64/libpthread.so.0
#1 0x00007fce1baf4c8c in fd_ctx_dump () from /usr/lib64/libglusterfs.so.0
#2 0x00007fce1bade793 in inode_dump () from /usr/lib64/libglusterfs.so.0
#3 0x00007fce1bae0957 in inode_table_dump () from /usr/lib64/libglusterfs.so.0
#4 0x00007fce12aca289 in fuse_itable_dump () from
/usr/lib64/glusterfs/3.8.4/xlator/mount/fuse.so
#5 0x00007fce1bafdd92 in gf_proc_dump_xlator_info () from
/usr/lib64/libglusterfs.so.0
#6 0x00007fce1bafe6c5 in gf_proc_dump_info () from
/usr/lib64/libglusterfs.so.0
#7 0x00007fce1bfc53ed in glusterfs_sigwaiter ()
#8 0x00007fce1abb3aa1 in start_thread () from /lib64/libpthread.so.0
#9 0x00007fce1a51caad in clone () from /lib64/libc.so.6
Thread 8 (Thread 0x7fce116b4700 (LWP 5380)):
#0 0x00007fce1abb7a5e in pthread_cond_timedwait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
#1 0x00007fce1bb09cdc in syncenv_task () from /usr/lib64/libglusterfs.so.0
#2 0x00007fce1bb17d20 in syncenv_processor () from
/usr/lib64/libglusterfs.so.0
#3 0x00007fce1abb3aa1 in start_thread () from /lib64/libpthread.so.0
#4 0x00007fce1a51caad in clone () from /lib64/libc.so.6
Thread 7 (Thread 0x7fce0ea7e700 (LWP 5383)):
#0 0x00007fce1abba334 in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007fce1abb55d8 in _L_lock_854 () from /lib64/libpthread.so.0
#2 0x00007fce1abb54a7 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007fce1bae1741 in inode_unref () from /usr/lib64/libglusterfs.so.0
#4 0x00007fce1bacdbf2 in loc_wipe () from /usr/lib64/libglusterfs.so.0
#5 0x00007fce0de3d94e in client_local_wipe () from
/usr/lib64/glusterfs/3.8.4/xlator/protocol/client.so
#6 0x00007fce0de5686c in client3_3_open_cbk () from
/usr/lib64/glusterfs/3.8.4/xlator/protocol/client.so
#7 0x00007fce1b89a925 in rpc_clnt_handle_reply () from
/usr/lib64/libgfrpc.so.0
#8 0x00007fce1b89ba8c in rpc_clnt_notify () from /usr/lib64/libgfrpc.so.0
#9 0x00007fce1b896bc8 in rpc_transport_notify () from /usr/lib64/libgfrpc.so.0
#10 0x00007fce100a456d in socket_event_poll_in () from
/usr/lib64/glusterfs/3.8.4/rpc-transport/socket.so
#11 0x00007fce100a585e in socket_event_handler () from
/usr/lib64/glusterfs/3.8.4/rpc-transport/socket.so
#12 0x00007fce1bb2cc96 in event_dispatch_epoll_worker () from
/usr/lib64/libglusterfs.so.0
#13 0x00007fce1abb3aa1 in start_thread () from /lib64/libpthread.so.0
#14 0x00007fce1a51caad in clone () from /lib64/libc.so.6
Thread 6 (Thread 0x7fce076ff700 (LWP 5384)):
#0 0x00007fce1abba334 in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007fce1abb55d8 in _L_lock_854 () from /lib64/libpthread.so.0
#2 0x00007fce1abb54a7 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007fce1bae1741 in inode_unref () from /usr/lib64/libglusterfs.so.0
#4 0x00007fce1bacdbf2 in loc_wipe () from /usr/lib64/libglusterfs.so.0
#5 0x00007fce0cce1b7c in ob_fd_free () from
/usr/lib64/glusterfs/3.8.4/xlator/performance/open-behind.so
#6 0x00007fce0cce1dd0 in ob_wake_cbk () from
/usr/lib64/glusterfs/3.8.4/xlator/performance/open-behind.so
#7 0x00007fce1bb5e186 in default_open_cbk () from /usr/lib64/libglusterfs.so.0
#8 0x00007fce0d0f2c39 in ioc_open_cbk () from
/usr/lib64/glusterfs/3.8.4/xlator/performance/io-cache.so
#9 0x00007fce0d50f754 in ra_open_cbk () from
/usr/lib64/glusterfs/3.8.4/xlator/performance/read-ahead.so
#10 0x00007fce0d99139e in dht_open_cbk () from
/usr/lib64/glusterfs/3.8.4/xlator/cluster/distribute.so
#11 0x00007fce0dbdb26d in afr_open_cbk () from
/usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so
#12 0x00007fce0de5685a in client3_3_open_cbk () from
/usr/lib64/glusterfs/3.8.4/xlator/protocol/client.so
#13 0x00007fce1b89a925 in rpc_clnt_handle_reply () from
/usr/lib64/libgfrpc.so.0
#14 0x00007fce1b89ba8c in rpc_clnt_notify () from /usr/lib64/libgfrpc.so.0
#15 0x00007fce1b896bc8 in rpc_transport_notify () from /usr/lib64/libgfrpc.so.0
#16 0x00007fce100a456d in socket_event_poll_in () from
/usr/lib64/glusterfs/3.8.4/rpc-transport/socket.so
#17 0x00007fce100a585e in socket_event_handler () from
/usr/lib64/glusterfs/3.8.4/rpc-transport/socket.so
#18 0x00007fce1bb2cc96 in event_dispatch_epoll_worker () from
/usr/lib64/libglusterfs.so.0
#19 0x00007fce1abb3aa1 in start_thread () from /lib64/libpthread.so.0
#20 0x00007fce1a51caad in clone () from /lib64/libc.so.6
Thread 5 (Thread 0x7fce0c26e700 (LWP 5385)):
#0 0x00007fce1abb7a5e in pthread_cond_timedwait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
#1 0x00007fce0c8c1a6c in iot_worker () from
/usr/lib64/glusterfs/3.8.4/xlator/performance/io-threads.so
#2 0x00007fce1abb3aa1 in start_thread () from /lib64/libpthread.so.0
#3 0x00007fce1a51caad in clone () from /lib64/libc.so.6
Thread 4 (Thread 0x7fcdf7efe700 (LWP 5389)):
#0 0x00007fce1abba334 in __lll_lock_wait () from /lib64/libpthread.so.0
#1 0x00007fce1abb55d8 in _L_lock_854 () from /lib64/libpthread.so.0
#2 0x00007fce1abb54a7 in pthread_mutex_lock () from /lib64/libpthread.so.0
#3 0x00007fce1bae1ac1 in inode_ref () from /usr/lib64/libglusterfs.so.0
#4 0x00007fce12abc01f in fuse_ino_to_inode () from
/usr/lib64/glusterfs/3.8.4/xlator/mount/fuse.so
#5 0x00007fce12abd01d in fuse_resolve_inode_init () from
/usr/lib64/glusterfs/3.8.4/xlator/mount/fuse.so
#6 0x00007fce12ac832a in fuse_getattr () from
/usr/lib64/glusterfs/3.8.4/xlator/mount/fuse.so
#7 0x00007fce12ad6270 in fuse_thread_proc () from
/usr/lib64/glusterfs/3.8.4/xlator/mount/fuse.so
#8 0x00007fce1abb3aa1 in start_thread () from /lib64/libpthread.so.0
#9 0x00007fce1a51caad in clone () from /lib64/libc.so.6
Thread 3 (Thread 0x7fcdf74fd700 (LWP 5390)):
#0 0x00007fce1abb768c in pthread_cond_wait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
#1 0x00007fce12ac7d9b in notify_kernel_loop () from
/usr/lib64/glusterfs/3.8.4/xlator/mount/fuse.so
#2 0x00007fce1abb3aa1 in start_thread () from /lib64/libpthread.so.0
#3 0x00007fce1a51caad in clone () from /lib64/libc.so.6
Thread 2 (Thread 0x7fcca2ffa700 (LWP 6850)):
#0 0x00007fce1abb7a5e in pthread_cond_timedwait@@GLIBC_2.3.2 () from
/lib64/libpthread.so.0
#1 0x00007fce1bb09cdc in syncenv_task () from /usr/lib64/libglusterfs.so.0
#2 0x00007fce1bb17d20 in syncenv_processor () from
/usr/lib64/libglusterfs.so.0
#3 0x00007fce1abb3aa1 in start_thread () from /lib64/libpthread.so.0
#4 0x00007fce1a51caad in clone () from /lib64/libc.so.6
Thread 1 (Thread 0x7fce1bfa8740 (LWP 5377)):
#0 0x00007fce1abb42fd in pthread_join () from /lib64/libpthread.so.0
#1 0x00007fce1bb2c75d in event_dispatch_epoll () from
/usr/lib64/libglusterfs.so.0
#2 0x00007fce1bfc749b in main ()
This is the code path we found based on this bt which lead to the deadlock:
==========================================================================
open-behind is taking fd->lock then inode->lock where as statedump is taking
inode->lock then fd->lock, so it is leading to deadlock:
void
ob_fd_free (ob_fd_t *ob_fd)
{
loc_wipe (&ob_fd->loc); <<--- this takes (inode->lock)
if (ob_fd->xdata)
dict_unref (ob_fd->xdata);
if (ob_fd->open_frame)
STACK_DESTROY (ob_fd->open_frame->root);
GF_FREE (ob_fd);
}
int
ob_wake_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, fd_t *fd_ret, dict_t *xdata)
{
fd_t *fd = NULL;
struct list_head list;
ob_fd_t *ob_fd = NULL;
call_stub_t *stub = NULL, *tmp = NULL;
fd = frame->local;
frame->local = NULL;
INIT_LIST_HEAD (&list);
LOCK (&fd->lock); <<---- fd->lock
{
ob_fd = __ob_fd_ctx_get (this, fd);
list_splice_init (&ob_fd->list, &list);
if (op_ret < 0) {
/* mark fd BAD for ever */
ob_fd->op_errno = op_errno;
} else {
__fd_ctx_del (fd, this, NULL);
ob_fd_free (ob_fd);
}
}
UNLOCK (&fd->lock);
==============================================================
inode_dump (inode_t *inode, char *prefix)
{
int ret = -1;
xlator_t *xl = NULL;
int i = 0;
fd_t *fd = NULL;
struct _inode_ctx *inode_ctx = NULL;
struct list_head fd_list;
if (!inode)
return;
INIT_LIST_HEAD (&fd_list);
ret = TRY_LOCK(&inode->lock); <<---- takes inode->lock
if (ret != 0) {
return;
}
{
gf_proc_dump_write("gfid", "%s", uuid_utoa (inode->gfid));
gf_proc_dump_write("nlookup", "%ld", inode->nlookup);
gf_proc_dump_write("fd-count", "%u", inode->fd_count);
gf_proc_dump_write("ref", "%u", inode->ref);
gf_proc_dump_write("ia_type", "%d", inode->ia_type);
if (inode->_ctx) {
inode_ctx = GF_CALLOC (inode->table->ctxcount,
sizeof (*inode_ctx),
gf_common_mt_inode_ctx);
if (inode_ctx == NULL) {
goto unlock;
}
for (i = 0; i < inode->table->ctxcount;
i++) {
inode_ctx[i] = inode->_ctx[i];
}
}
if (dump_options.xl_options.dump_fdctx != _gf_true)
goto unlock;
list_for_each_entry (fd, &inode->fd_list, inode_list) {
fd_ctx_dump (fd, prefix); <<<-----------------
}
}
fd_ctx_dump (fd_t *fd, char *prefix)
{
struct _fd_ctx *fd_ctx = NULL;
xlator_t *xl = NULL;
int i = 0;
if ((fd == NULL) || (fd->_ctx == NULL)) {
goto out;
}
LOCK (&fd->lock); <<<-------------------
{
if (fd->_ctx != NULL) {
fd_ctx = GF_CALLOC (fd->xl_count, sizeof (*fd_ctx),
gf_common_mt_fd_ctx);
if (fd_ctx == NULL) {
goto unlock;
}
for (i = 0; i < fd->xl_count; i++) {
fd_ctx[i] = fd->_ctx[i];
}
}
}
--
You are receiving this mail because:
You are on the CC list for the bug.
You are the assignee for the bug.
More information about the Bugs
mailing list