[Bugs] [Bug 1410425] [GNFS+EC] Cthon failures/issues with Lock/ Special Test cases on disperse volume with GNFS mount

bugzilla at redhat.com bugzilla at redhat.com
Thu Jan 5 13:17:30 UTC 2017


https://bugzilla.redhat.com/show_bug.cgi?id=1410425



--- Comment #1 from Pranith Kumar K <pkarampu at redhat.com> ---
Looks like there is an issue with posix locks in EC xlator.

Test #3 - Try to lock just the 1st byte.
    Parent: 3.0  - F_TLOCK [               0,               1] PASSED.
    Child:  3.1  - F_TEST  [               0,               1] PASSED.
    Child:  3.2  - F_TEST  [               0,          ENDING] PASSED.
    Child:  3.3  - F_TEST  [               1,               1] FAILED!
    Child:  **** Expected success, returned EACCES...
    Child:  **** Probably implementation error.

Here Parent process has lock starting at byte '0' and length '1'. Child process
is trying to take a lock starting at byte '1' of length '1' which should have
ideally got granted.


(gdb) bt
#0  client3_3_lk (frame=0x7f8e5388bdbc, this=0x7f8e44020b00,
data=0x7f8e48db7ec0) at client-rpc-fops.c:5349
#1  0x00007f8e4836e4be in client_lk (frame=0x7f8e5388bdbc, this=<optimized
out>, fd=<optimized out>, cmd=5, 
    lock=<optimized out>, xdata=<optimized out>) at client.c:1652
#2  0x00007f8e4370204d in ec_wind_lk (ec=0x7f8e44067700, fop=0x7f8e380dc06c,
idx=0) at ec-locks.c:1003
#3  0x00007f8e436f7f08 in ec_dispatch_mask (fop=fop at entry=0x7f8e380dc06c,
mask=63) at ec-common.c:521
#4  0x00007f8e436f8159 in ec_dispatch_all (fop=fop at entry=0x7f8e380dc06c) at
ec-common.c:597
#5  0x00007f8e43704280 in ec_manager_lk (fop=0x7f8e380dc06c, state=<optimized
out>) at ec-locks.c:1028
#6  0x00007f8e436f779b in __ec_manager (fop=0x7f8e380dc06c, error=0) at
ec-common.c:2287
#7  0x00007f8e436f148c in ec_gf_lk (frame=<optimized out>, this=<optimized
out>, fd=<optimized out>, 
    cmd=<optimized out>, flock=<optimized out>, xdata=<optimized out>) at
ec.c:888
#8  0x00007f8e48141865 in dht_lk (frame=frame at entry=0x7f8e53894808,
this=this at entry=0x7f8e440320f0, 
    fd=fd at entry=0x7f8e5609d0f8, cmd=cmd at entry=5,
flock=flock at entry=0x7f8e48db87a0, xdata=xdata at entry=0x0)
    at dht-inode-read.c:1067
#9  0x00007f8e55e24e39 in default_lk (frame=0x7f8e53894808, this=<optimized
out>, fd=0x7f8e5609d0f8, cmd=5, 
    lock=0x7f8e48db87a0, xdata=0x0) at defaults.c:2524
#10 0x00007f8e43bd39e4 in io_stats_lk (frame=0x7f8e53889a20,
this=0x7f8e44033e70, fd=0x7f8e5609d0f8, cmd=5, 
    lock=0x7f8e48db87a0, xdata=0x0) at io-stats.c:3345
#11 0x00007f8e43495752 in nfs_fop_lk (nfsx=<optimized out>, xl=0x7f8e44033e70,
nfu=nfu at entry=0x7f8e48db8bc0, 
    fd=0x7f8e5609d0f8, cmd=cmd at entry=5, flock=flock at entry=0x7f8e48db87a0,
cbk=0x7f8e434be0c0 <nlm4svc_test_cbk>, 
    local=0x7f8e398cbd04) at nfs-fops.c:1565
#12 0x00007f8e43498105 in nfs_lk (nfsx=<optimized out>, xl=<optimized out>,
nfu=nfu at entry=0x7f8e48db8bc0, 
    fd=<optimized out>, cmd=cmd at entry=5, flock=flock at entry=0x7f8e48db87a0, 
    cbk=cbk at entry=0x7f8e434be0c0 <nlm4svc_test_cbk>,
local=local at entry=0x7f8e398cbd04) at nfs-generics.c:151
#13 0x00007f8e434be6bc in nlm4_test_fd_resume (carg=carg at entry=0x7f8e398cbd04)
at nlm4.c:806
#14 0x00007f8e434be74a in nlm4_test_resume (carg=0x7f8e398cbd04) at nlm4.c:830
#15 0x00007f8e434baa9c in nfs3_fh_resolve_inode_done
(cs=cs at entry=0x7f8e398cbd04, inode=inode at entry=0x7f8e41e25128)
    at nfs3-helpers.c:3619
#16 0x00007f8e434bb2db in nfs3_fh_resolve_inode (cs=0x7f8e398cbd04) at
nfs3-helpers.c:3828
#17 0x00007f8e434bb385 in nfs3_fh_resolve_resume (cs=cs at entry=0x7f8e398cbd04)
at nfs3-helpers.c:3860
#18 0x00007f8e434bb5a8 in nfs3_fh_resolve_root (cs=cs at entry=0x7f8e398cbd04) at
nfs3-helpers.c:3915
#19 0x00007f8e434bb7f1 in nfs3_fh_resolve_and_resume
(cs=cs at entry=0x7f8e398cbd04, fh=fh at entry=0x7f8e48db98f0, 
    entry=entry at entry=0x0, resum_fn=resum_fn at entry=0x7f8e434be6f0
<nlm4_test_resume>) at nfs3-helpers.c:4011
#20 0x00007f8e434be311 in nlm4svc_test (req=0x7f8e48040b70) at nlm4.c:887
#21 0x00007f8e55b6f775 in rpcsvc_handle_rpc_call (svc=0x7f8e440480e0,
trans=trans at entry=0x7f8e44522130, 
    msg=<optimized out>) at rpcsvc.c:695
#22 0x00007f8e55b6f95b in rpcsvc_notify (trans=0x7f8e44522130,
mydata=<optimized out>, event=<optimized out>, 
    data=<optimized out>) at rpcsvc.c:789
#23 0x00007f8e55b71893 in rpc_transport_notify (this=this at entry=0x7f8e44522130, 
    event=event at entry=RPC_TRANSPORT_MSG_RECEIVED,
data=data at entry=0x7f8e44422750) at rpc-transport.c:538
#24 0x00007f8e4a6632d4 in socket_event_poll_in (this=this at entry=0x7f8e44522130)
at socket.c:2267
#25 0x00007f8e4a665785 in socket_event_handler (fd=<optimized out>, idx=31,
data=0x7f8e44522130, poll_in=1, 
    poll_out=0, poll_err=0) at socket.c:2397
#26 0x00007f8e55e05650 in event_dispatch_epoll_handler (event=0x7f8e48db9e80,
event_pool=0x7f8e5791df00)
    at event-epoll.c:571
#27 event_dispatch_epoll_worker (data=0x7f8e579708b0) at event-epoll.c:674
#28 0x00007f8e54c0cdc5 in start_thread () from /lib64/libpthread.so.0
#29 0x00007f8e5455173d in clone () from /lib64/libc.so.6
(gdb) f 5
#5  0x00007f8e43704280 in ec_manager_lk (fop=0x7f8e380dc06c, state=<optimized
out>) at ec-locks.c:1028
1028                ec_dispatch_all(fop);
(gdb) l
1023                }
1024    
1025            /* Fall through */
1026    
1027            case EC_STATE_DISPATCH:
1028                ec_dispatch_all(fop);
1029    
1030                return EC_STATE_PREPARE_ANSWER;
1031    
1032            case EC_STATE_PREPARE_ANSWER:
(gdb) f 4
#4  0x00007f8e436f8159 in ec_dispatch_all (fop=fop at entry=0x7f8e380dc06c) at
ec-common.c:597
597                    ec_dispatch_mask(fop, fop->remaining);
(gdb) l
592    
593            if (ec_child_select(fop)) {
594                    fop->expected = gf_bits_count(fop->remaining);
595                    fop->first = 0;
596    
597                    ec_dispatch_mask(fop, fop->remaining);
598            }
599    }
600    
601    void ec_dispatch_min(ec_fop_data_t * fop)
(gdb) f 3
#3  0x00007f8e436f7f08 in ec_dispatch_mask (fop=fop at entry=0x7f8e380dc06c,
mask=63) at ec-common.c:521
521                fop->wind(ec, fop, idx);
(gdb) l
516        idx = 0;
517        while (mask != 0)
518        {
519            if ((mask & 1) != 0)
520            {
521                fop->wind(ec, fop, idx);
522            }
523            idx++;
524            mask >>= 1;
525        }
(gdb) f 2
#2  0x00007f8e4370204d in ec_wind_lk (ec=0x7f8e44067700, fop=0x7f8e380dc06c,
idx=0) at ec-locks.c:1003
1003        STACK_WIND_COOKIE(fop->frame, ec_lk_cbk, (void *)(uintptr_t)idx,
(gdb) p fop->flock
$11 = {l_type = 1, l_whence = 0, l_start = 0, l_len = 512, l_pid = 164, l_owner
= {len = 36, 
    data = "164 at dhcp46-30.lab.eng.blr.redhat.com", '\000' <repeats 987 times>}}
(gdb) f 8
#8  0x00007f8e48141865 in dht_lk (frame=frame at entry=0x7f8e53894808,
this=this at entry=0x7f8e440320f0, 
    fd=fd at entry=0x7f8e5609d0f8, cmd=cmd at entry=5,
flock=flock at entry=0x7f8e48db87a0, xdata=xdata at entry=0x0)
    at dht-inode-read.c:1067
1067            STACK_WIND (frame, dht_lk_cbk, lock_subvol,
lock_subvol->fops->lk, fd,
(gdb) p *flock
$12 = {l_type = 1, l_whence = 0, l_start = 1, l_len = 1, l_pid = 164, l_owner =
{len = 36, 
    data = "164 at dhcp46-30.lab.eng.blr.redhat.com", '\000' <repeats 987 times>}}

>>>> As we can see above, till dht layer flock->start was '1' and len is '1'.

(gdb) f 7
#7  0x00007f8e436f148c in ec_gf_lk (frame=<optimized out>, this=<optimized
out>, fd=<optimized out>, 
    cmd=<optimized out>, flock=<optimized out>, xdata=<optimized out>) at
ec.c:888
888        ec_lk(frame, this, -1, minimum, default_lk_cbk, NULL, fd, cmd,
(gdb) p *flock
value has been optimized out
(gdb) p fop
No symbol "fop" in current context.
(gdb) l
883                     int32_t cmd, struct gf_flock * flock, dict_t * xdata)
884    {
885        int32_t minimum = EC_MINIMUM_ALL;
886        if (flock->l_type == F_UNLCK)
887                minimum = EC_MINIMUM_ONE;
888        ec_lk(frame, this, -1, minimum, default_lk_cbk, NULL, fd, cmd,
889              flock, xdata);
890    
891        return 0;
892    }
(gdb) p *flock
value has been optimized out
(gdb) f 6
#6  0x00007f8e436f779b in __ec_manager (fop=0x7f8e380dc06c, error=0) at
ec-common.c:2287
2287            fop->state = fop->handler(fop, fop->state);
(gdb) p fop
$13 = (ec_fop_data_t *) 0x7f8e380dc06c
(gdb) p fop->flock
$14 = {l_type = 1, l_whence = 0, l_start = 0, l_len = 512, l_pid = 164, l_owner
= {len = 36, 
    data = "164 at dhcp46-30.lab.eng.blr.redhat.com", '\000' <repeats 987 times>}}
(gdb) 

>>> But in EC layer, fop->flock->start got reset to '0' and lock length to 512 resulting in  access denied for the other process trying to take lock in non-conflicting range.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
You are the assignee for the bug.


More information about the Bugs mailing list