[Bugs] [Bug 1410425] [GNFS+EC] Cthon failures/issues with Lock/ Special Test cases on disperse volume with GNFS mount
bugzilla at redhat.com
bugzilla at redhat.com
Thu Jan 5 13:17:30 UTC 2017
https://bugzilla.redhat.com/show_bug.cgi?id=1410425
--- Comment #1 from Pranith Kumar K <pkarampu at redhat.com> ---
Looks like there is an issue with posix locks in EC xlator.
Test #3 - Try to lock just the 1st byte.
Parent: 3.0 - F_TLOCK [ 0, 1] PASSED.
Child: 3.1 - F_TEST [ 0, 1] PASSED.
Child: 3.2 - F_TEST [ 0, ENDING] PASSED.
Child: 3.3 - F_TEST [ 1, 1] FAILED!
Child: **** Expected success, returned EACCES...
Child: **** Probably implementation error.
Here Parent process has lock starting at byte '0' and length '1'. Child process
is trying to take a lock starting at byte '1' of length '1' which should have
ideally got granted.
(gdb) bt
#0 client3_3_lk (frame=0x7f8e5388bdbc, this=0x7f8e44020b00,
data=0x7f8e48db7ec0) at client-rpc-fops.c:5349
#1 0x00007f8e4836e4be in client_lk (frame=0x7f8e5388bdbc, this=<optimized
out>, fd=<optimized out>, cmd=5,
lock=<optimized out>, xdata=<optimized out>) at client.c:1652
#2 0x00007f8e4370204d in ec_wind_lk (ec=0x7f8e44067700, fop=0x7f8e380dc06c,
idx=0) at ec-locks.c:1003
#3 0x00007f8e436f7f08 in ec_dispatch_mask (fop=fop at entry=0x7f8e380dc06c,
mask=63) at ec-common.c:521
#4 0x00007f8e436f8159 in ec_dispatch_all (fop=fop at entry=0x7f8e380dc06c) at
ec-common.c:597
#5 0x00007f8e43704280 in ec_manager_lk (fop=0x7f8e380dc06c, state=<optimized
out>) at ec-locks.c:1028
#6 0x00007f8e436f779b in __ec_manager (fop=0x7f8e380dc06c, error=0) at
ec-common.c:2287
#7 0x00007f8e436f148c in ec_gf_lk (frame=<optimized out>, this=<optimized
out>, fd=<optimized out>,
cmd=<optimized out>, flock=<optimized out>, xdata=<optimized out>) at
ec.c:888
#8 0x00007f8e48141865 in dht_lk (frame=frame at entry=0x7f8e53894808,
this=this at entry=0x7f8e440320f0,
fd=fd at entry=0x7f8e5609d0f8, cmd=cmd at entry=5,
flock=flock at entry=0x7f8e48db87a0, xdata=xdata at entry=0x0)
at dht-inode-read.c:1067
#9 0x00007f8e55e24e39 in default_lk (frame=0x7f8e53894808, this=<optimized
out>, fd=0x7f8e5609d0f8, cmd=5,
lock=0x7f8e48db87a0, xdata=0x0) at defaults.c:2524
#10 0x00007f8e43bd39e4 in io_stats_lk (frame=0x7f8e53889a20,
this=0x7f8e44033e70, fd=0x7f8e5609d0f8, cmd=5,
lock=0x7f8e48db87a0, xdata=0x0) at io-stats.c:3345
#11 0x00007f8e43495752 in nfs_fop_lk (nfsx=<optimized out>, xl=0x7f8e44033e70,
nfu=nfu at entry=0x7f8e48db8bc0,
fd=0x7f8e5609d0f8, cmd=cmd at entry=5, flock=flock at entry=0x7f8e48db87a0,
cbk=0x7f8e434be0c0 <nlm4svc_test_cbk>,
local=0x7f8e398cbd04) at nfs-fops.c:1565
#12 0x00007f8e43498105 in nfs_lk (nfsx=<optimized out>, xl=<optimized out>,
nfu=nfu at entry=0x7f8e48db8bc0,
fd=<optimized out>, cmd=cmd at entry=5, flock=flock at entry=0x7f8e48db87a0,
cbk=cbk at entry=0x7f8e434be0c0 <nlm4svc_test_cbk>,
local=local at entry=0x7f8e398cbd04) at nfs-generics.c:151
#13 0x00007f8e434be6bc in nlm4_test_fd_resume (carg=carg at entry=0x7f8e398cbd04)
at nlm4.c:806
#14 0x00007f8e434be74a in nlm4_test_resume (carg=0x7f8e398cbd04) at nlm4.c:830
#15 0x00007f8e434baa9c in nfs3_fh_resolve_inode_done
(cs=cs at entry=0x7f8e398cbd04, inode=inode at entry=0x7f8e41e25128)
at nfs3-helpers.c:3619
#16 0x00007f8e434bb2db in nfs3_fh_resolve_inode (cs=0x7f8e398cbd04) at
nfs3-helpers.c:3828
#17 0x00007f8e434bb385 in nfs3_fh_resolve_resume (cs=cs at entry=0x7f8e398cbd04)
at nfs3-helpers.c:3860
#18 0x00007f8e434bb5a8 in nfs3_fh_resolve_root (cs=cs at entry=0x7f8e398cbd04) at
nfs3-helpers.c:3915
#19 0x00007f8e434bb7f1 in nfs3_fh_resolve_and_resume
(cs=cs at entry=0x7f8e398cbd04, fh=fh at entry=0x7f8e48db98f0,
entry=entry at entry=0x0, resum_fn=resum_fn at entry=0x7f8e434be6f0
<nlm4_test_resume>) at nfs3-helpers.c:4011
#20 0x00007f8e434be311 in nlm4svc_test (req=0x7f8e48040b70) at nlm4.c:887
#21 0x00007f8e55b6f775 in rpcsvc_handle_rpc_call (svc=0x7f8e440480e0,
trans=trans at entry=0x7f8e44522130,
msg=<optimized out>) at rpcsvc.c:695
#22 0x00007f8e55b6f95b in rpcsvc_notify (trans=0x7f8e44522130,
mydata=<optimized out>, event=<optimized out>,
data=<optimized out>) at rpcsvc.c:789
#23 0x00007f8e55b71893 in rpc_transport_notify (this=this at entry=0x7f8e44522130,
event=event at entry=RPC_TRANSPORT_MSG_RECEIVED,
data=data at entry=0x7f8e44422750) at rpc-transport.c:538
#24 0x00007f8e4a6632d4 in socket_event_poll_in (this=this at entry=0x7f8e44522130)
at socket.c:2267
#25 0x00007f8e4a665785 in socket_event_handler (fd=<optimized out>, idx=31,
data=0x7f8e44522130, poll_in=1,
poll_out=0, poll_err=0) at socket.c:2397
#26 0x00007f8e55e05650 in event_dispatch_epoll_handler (event=0x7f8e48db9e80,
event_pool=0x7f8e5791df00)
at event-epoll.c:571
#27 event_dispatch_epoll_worker (data=0x7f8e579708b0) at event-epoll.c:674
#28 0x00007f8e54c0cdc5 in start_thread () from /lib64/libpthread.so.0
#29 0x00007f8e5455173d in clone () from /lib64/libc.so.6
(gdb) f 5
#5 0x00007f8e43704280 in ec_manager_lk (fop=0x7f8e380dc06c, state=<optimized
out>) at ec-locks.c:1028
1028 ec_dispatch_all(fop);
(gdb) l
1023 }
1024
1025 /* Fall through */
1026
1027 case EC_STATE_DISPATCH:
1028 ec_dispatch_all(fop);
1029
1030 return EC_STATE_PREPARE_ANSWER;
1031
1032 case EC_STATE_PREPARE_ANSWER:
(gdb) f 4
#4 0x00007f8e436f8159 in ec_dispatch_all (fop=fop at entry=0x7f8e380dc06c) at
ec-common.c:597
597 ec_dispatch_mask(fop, fop->remaining);
(gdb) l
592
593 if (ec_child_select(fop)) {
594 fop->expected = gf_bits_count(fop->remaining);
595 fop->first = 0;
596
597 ec_dispatch_mask(fop, fop->remaining);
598 }
599 }
600
601 void ec_dispatch_min(ec_fop_data_t * fop)
(gdb) f 3
#3 0x00007f8e436f7f08 in ec_dispatch_mask (fop=fop at entry=0x7f8e380dc06c,
mask=63) at ec-common.c:521
521 fop->wind(ec, fop, idx);
(gdb) l
516 idx = 0;
517 while (mask != 0)
518 {
519 if ((mask & 1) != 0)
520 {
521 fop->wind(ec, fop, idx);
522 }
523 idx++;
524 mask >>= 1;
525 }
(gdb) f 2
#2 0x00007f8e4370204d in ec_wind_lk (ec=0x7f8e44067700, fop=0x7f8e380dc06c,
idx=0) at ec-locks.c:1003
1003 STACK_WIND_COOKIE(fop->frame, ec_lk_cbk, (void *)(uintptr_t)idx,
(gdb) p fop->flock
$11 = {l_type = 1, l_whence = 0, l_start = 0, l_len = 512, l_pid = 164, l_owner
= {len = 36,
data = "164 at dhcp46-30.lab.eng.blr.redhat.com", '\000' <repeats 987 times>}}
(gdb) f 8
#8 0x00007f8e48141865 in dht_lk (frame=frame at entry=0x7f8e53894808,
this=this at entry=0x7f8e440320f0,
fd=fd at entry=0x7f8e5609d0f8, cmd=cmd at entry=5,
flock=flock at entry=0x7f8e48db87a0, xdata=xdata at entry=0x0)
at dht-inode-read.c:1067
1067 STACK_WIND (frame, dht_lk_cbk, lock_subvol,
lock_subvol->fops->lk, fd,
(gdb) p *flock
$12 = {l_type = 1, l_whence = 0, l_start = 1, l_len = 1, l_pid = 164, l_owner =
{len = 36,
data = "164 at dhcp46-30.lab.eng.blr.redhat.com", '\000' <repeats 987 times>}}
>>>> As we can see above, till dht layer flock->start was '1' and len is '1'.
(gdb) f 7
#7 0x00007f8e436f148c in ec_gf_lk (frame=<optimized out>, this=<optimized
out>, fd=<optimized out>,
cmd=<optimized out>, flock=<optimized out>, xdata=<optimized out>) at
ec.c:888
888 ec_lk(frame, this, -1, minimum, default_lk_cbk, NULL, fd, cmd,
(gdb) p *flock
value has been optimized out
(gdb) p fop
No symbol "fop" in current context.
(gdb) l
883 int32_t cmd, struct gf_flock * flock, dict_t * xdata)
884 {
885 int32_t minimum = EC_MINIMUM_ALL;
886 if (flock->l_type == F_UNLCK)
887 minimum = EC_MINIMUM_ONE;
888 ec_lk(frame, this, -1, minimum, default_lk_cbk, NULL, fd, cmd,
889 flock, xdata);
890
891 return 0;
892 }
(gdb) p *flock
value has been optimized out
(gdb) f 6
#6 0x00007f8e436f779b in __ec_manager (fop=0x7f8e380dc06c, error=0) at
ec-common.c:2287
2287 fop->state = fop->handler(fop, fop->state);
(gdb) p fop
$13 = (ec_fop_data_t *) 0x7f8e380dc06c
(gdb) p fop->flock
$14 = {l_type = 1, l_whence = 0, l_start = 0, l_len = 512, l_pid = 164, l_owner
= {len = 36,
data = "164 at dhcp46-30.lab.eng.blr.redhat.com", '\000' <repeats 987 times>}}
(gdb)
>>> But in EC layer, fop->flock->start got reset to '0' and lock length to 512 resulting in access denied for the other process trying to take lock in non-conflicting range.
--
You are receiving this mail because:
You are on the CC list for the bug.
You are the assignee for the bug.
More information about the Bugs
mailing list