[Bugs] [Bug 1381970] GlusterFS Daemon stops working after a longer runtime and higher file workload due to design flaws ?

bugzilla at redhat.com bugzilla at redhat.com
Sun Feb 5 09:40:55 UTC 2017


https://bugzilla.redhat.com/show_bug.cgi?id=1381970



--- Comment #12 from Jules <jules at ispire.me> ---
We upgraded to latest GlusterFS 3.9.1-1 Release this night and realizing that
this makes the behavior even worse. It didn't took more than 8 Hours until the
same behavior happens.

Here is the output of the nfs.log:

Final graph:
+------------------------------------------------------------------------------+
  1: volume netshare-client-0
  2:     type protocol/client
  3:     option ping-timeout 5
  4:     option remote-host node2c
  5:     option remote-subvolume /storage/gfs/netshare
  6:     option transport-type socket
  7:     option username 5385f910-8b41-4c66-a1d3-f6d9c21a2733
  8:     option password 7a096f83-1017-4de4-a72d-24a1a61a2c8f
  9:     option send-gids true
 10: end-volume
 11:  
 12: volume netshare-client-1
 13:     type protocol/client
 14:     option ping-timeout 5
 15:     option remote-host node2d
 16:     option remote-subvolume /storage/gfs/netshare
 17:     option transport-type socket
 18:     option username 5385f910-8b41-4c66-a1d3-f6d9c21a2733
 19:     option password 7a096f83-1017-4de4-a72d-24a1a61a2c8f
 20:     option send-gids true
 21: end-volume
 22:  
 23: volume netshare-replicate-0
 24:     type cluster/replicate
 25:     option use-compound-fops off
 26:     subvolumes netshare-client-0 netshare-client-1
 27: end-volume
 28:  
 29: volume netshare-dht
 30:     type cluster/distribute
 31:     option min-free-disk 5%
 32:     option readdir-optimize on
 33:     option lock-migration off
 34:     subvolumes netshare-replicate-0
 35: end-volume
 36:  
 37: volume netshare-write-behind
 38:     type performance/write-behind
 39:     subvolumes netshare-dht
 40: end-volume
 41:  
 42: volume netshare
 43:     type debug/io-stats
 44:     option log-level WARNING
 45:     option latency-measurement off
 46:     option count-fop-hits off
 47:     subvolumes netshare-write-behind
 48: end-volume
 49:  
 50: volume netshare-dev-client-0
 51:     type protocol/client
 52:     option ping-timeout 5
 53:     option remote-host node2c
 54:     option remote-subvolume /storage/gfs/netshare-dev
 55:     option transport-type socket
 56:     option username edf1a0ce-db21-405d-a6f2-e96e1ecc629f
 57:     option password 8f27fe64-137e-4e62-8f03-1f6a69ad1d86
 58:     option send-gids true
 59: end-volume
 60:  
 61: volume netshare-dev-client-1
 62:     type protocol/client
 63:     option ping-timeout 5
 64:     option remote-host node2d
 65:     option remote-subvolume /storage/gfs/netshare-dev
 66:     option transport-type socket
 67:     option username edf1a0ce-db21-405d-a6f2-e96e1ecc629f
 68:     option password 8f27fe64-137e-4e62-8f03-1f6a69ad1d86
 69:     option send-gids true
 70: end-volume
 71:  
 72: volume netshare-dev-replicate-0
 73:     type cluster/replicate
 74:     option use-compound-fops off
 75:     subvolumes netshare-dev-client-0 netshare-dev-client-1
 76: end-volume
 77:  
 78: volume netshare-dev-dht
 79:     type cluster/distribute
 80:     option min-free-disk 5%
 81:     option readdir-optimize on
 82:     option lock-migration off
 83:     subvolumes netshare-dev-replicate-0
 84: end-volume
 85:  
 86: volume netshare-dev-write-behind
 87:     type performance/write-behind
 88:     subvolumes netshare-dev-dht
 89: end-volume
 90:  
 91: volume netshare-dev
 92:     type debug/io-stats
 93:     option log-level WARNING
 94:     option latency-measurement off
 95:     option count-fop-hits off
 96:     subvolumes netshare-dev-write-behind
 97: end-volume
 98:  
 99: volume nfs-server
100:     type nfs/server
101:     option rpc-auth.auth-glusterfs on
102:     option rpc-auth.auth-unix on
103:     option rpc-auth.auth-null on
104:     option rpc-auth.ports.insecure on
105:     option rpc-auth-allow-insecure on
106:     option transport-type socket
107:     option transport.socket.listen-port 2049
108:     option nfs.dynamic-volumes on
109:     option nfs.nlm on
110:     option nfs.drc off
111:     option rpc-auth.addr.netshare.allow *
112:     option nfs3.netshare.volume-id 6ddee9c9-bd06-43d0-8acf-405d6b759720
113:     option rpc-auth.addr.netshare-dev.allow *
114:     option nfs3.netshare-dev.volume-id
2e2967a8-9881-456f-bdc8-edc23f6a6ff1
115:     option nfs.gfs-vol0.disable on
116:     option nfs.netshare.disable off
117:     option nfs.netshare-dev.disable off
118:     subvolumes netshare netshare-dev
119: end-volume
120:  
+------------------------------------------------------------------------------+
[2017-02-05 08:57:30.276426] W [socket.c:588:__socket_rwv] 0-NLM-client: readv
on 10.1.20.36:11205 failed (No data available)
[2017-02-05 08:59:47.746940] E [socket.c:2307:socket_connect_finish]
0-NLM-client: connection to 10.1.20.36:11205 failed (Connection timed out)
pending frames:
frame : type(0) op(0)
frame : type(0) op(0)
patchset: git://git.gluster.com/glusterfs.git
signal received: 11
time of crash: 
2017-02-05 08:59:49
configuration details:
argp 1
backtrace 1
dlfcn 1
libpthread 1
llistxattr 1
setfsid 1
spinlock 1
epoll.h 1
xattr.h 1
st_atim.tv_nsec 1
package-string: glusterfs 3.9.1
/usr/lib/x86_64-linux-gnu/libglusterfs.so.0(_gf_msg_backtrace_nomem+0xac)[0x7fc411ee271c]
/usr/lib/x86_64-linux-gnu/libglusterfs.so.0(gf_print_trace+0x324)[0x7fc411eebd34]
/lib/x86_64-linux-gnu/libc.so.6(+0x350e0)[0x7fc41097d0e0]
/lib/x86_64-linux-gnu/libc.so.6(+0x91d8a)[0x7fc4109d9d8a]
/usr/lib/x86_64-linux-gnu/glusterfs/3.9.1/xlator/nfs/server.so(+0x3a352)[0x7fc40b1f3352]
/usr/lib/x86_64-linux-gnu/glusterfs/3.9.1/xlator/nfs/server.so(+0x3cc15)[0x7fc40b1f5c15]
/usr/lib/x86_64-linux-gnu/libgfrpc.so.0(rpc_clnt_notify+0x234)[0x7fc411cac4b4]
/usr/lib/x86_64-linux-gnu/libgfrpc.so.0(rpc_transport_notify+0x23)[0x7fc411ca87e3]
/usr/lib/x86_64-linux-gnu/glusterfs/3.9.1/rpc-transport/socket.so(+0x4b33)[0x7fc40cacab33]
/usr/lib/x86_64-linux-gnu/glusterfs/3.9.1/rpc-transport/socket.so(+0x8f07)[0x7fc40cacef07]
/usr/lib/x86_64-linux-gnu/libglusterfs.so.0(+0x7fc06)[0x7fc411f38c06]
/lib/x86_64-linux-gnu/libpthread.so.0(+0x8064)[0x7fc41115f064]
/lib/x86_64-linux-gnu/libc.so.6(clone+0x6d)[0x7fc410a3062d]
---------

Please fix that asap!

-- 
You are receiving this mail because:
You are on the CC list for the bug.
Unsubscribe from this bug https://bugzilla.redhat.com/token.cgi?t=mtbP1ochbZ&a=cc_unsubscribe


More information about the Bugs mailing list