[Gluster-users] Afr crashing quite frequently - very unstable
Vikas R
vicky.ice at gmail.com
Fri Jan 14 07:12:46 UTC 2011
Hi,
Im am unable to run gluster client in my setup. Afr with distribute crashes
randomly , seems like 3.1.0 is very unstable.
Stack:
[2011-01-14 06:57:28.726872] E
[afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] replicate-1: checksum
on
/streaming/set11/out/multiple_reduce.flash_pl.2.1294988017.1.172.26.98.55.2.gz
failed on subvolume distribute-2 (File descriptor in bad state)
[2011-01-14 06:57:28.726901] E
[afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] replicate-1: checksum
on
/streaming/set11/out/multiple_reduce.flash_pl.2.1294988017.1.172.26.98.55.2.gz
failed on subvolume distribute-1 (File descriptor in bad state)
[2011-01-14 06:57:28.726913] E
[afr-self-heal-algorithm.c:956:sh_diff_loop_driver] replicate-1: diff
meta-data data self-heal aborting on
/streaming/set11/out/multiple_reduce.flash_pl.2.1294988017.1.172.26.98.55.2.gz
[2011-01-14 06:57:28.726962] E
[afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] replicate-1: checksum
on
/streaming/set11/out/multiple_reduce.flash_pl.2.1294988017.1.172.26.98.55.2.gz
failed on subvolume distribute-2 (File descriptor in bad state)
[2011-01-14 06:57:28.726974] E
[afr-self-heal-algorithm.c:762:sh_diff_checksum_cbk] replicate-1: checksum
on
/streaming/set11/out/multiple_reduce.flash_pl.2.1294988017.1.172.26.98.55.2.gz
failed on subvolume distribute-1 (File descriptor in bad state)
[2011-01-14 06:57:28.726984] E
[afr-self-heal-algorithm.c:956:sh_diff_loop_driver] replicate-1: diff
meta-data data self-heal aborting on
/streaming/set11/out/multiple_reduce.flash_pl.2.1294988017.1.172.26.98.55.2.gz
[2011-01-14 06:57:28.727192] E [mem-pool.c:264:__gf_free]
(-->/usr/local/akamai/lib/glusterfs/3.1.0/xlator/cluster/afr.so [0xf613349d]
(-->/usr/local/akamai/lib/glusterfs/3.1.0/xlator/cluster/afr.so [0xf613181f]
(-->/usr/local/akamai/lib/glusterfs/3.1.0/xlator/cluster/afr.so
[0xf61316a6]))) : Assertion failed: 0
pending frames:
frame : type(1) op(LOOKUP)
frame : type(1) op(LOOKUP)
patchset: v3.1.0
signal received: 11
time of crash: 2011-01-14 06:57:28
configuration details:
argp 1
backtrace 1
dlfcn 1
fdatasync 1
libpthread 1
llistxattr 1
setfsid 1
spinlock 1
epoll.h 1
xattr.h 1
st_atim.tv_nsec 1
package-string: glusterfs 3.1.0
[0xffffe400]
/usr/local/akamai/lib/glusterfs/3.1.0/xlator/cluster/afr.so[0xf61316a6]
/usr/local/akamai/lib/glusterfs/3.1.0/xlator/cluster/afr.so[0xf613181f]
/usr/local/akamai/lib/glusterfs/3.1.0/xlator/cluster/afr.so[0xf613349d]
/usr/local/akamai/lib/glusterfs/3.1.0/xlator/cluster/afr.so[0xf613387d]
/usr/local/akamai/lib/libglusterfs.so.0(default_rchecksum_cbk+0x79)[0xf76f3979]
/usr/local/akamai/lib/glusterfs/3.1.0/xlator/protocol/client.so(client3_1_rchecksum+0x1b4)[0xf6181594]
/usr/local/akamai/lib/glusterfs/3.1.0/xlator/protocol/client.so(client_rchecksum+0x93)[0xf617c033]
/usr/local/akamai/lib/libglusterfs.so.0(default_rchecksum+0xd9)[0xf76edcb9]
/usr/local/akamai/lib/glusterfs/3.1.0/xlator/cluster/afr.so[0xf6131fbd]
/usr/local/akamai/lib/glusterfs/3.1.0/xlator/cluster/afr.so(afr_sh_algo_diff+0x14d)[0xf613248d]
/usr/local/akamai/lib/glusterfs/3.1.0/xlator/cluster/afr.so(afr_sh_data_sync_prepare+0x11a)[0xf61225fa]
/usr/local/akamai/lib/glusterfs/3.1.0/xlator/cluster/afr.so(afr_sh_data_fix+0x29c)[0xf612298c]
/usr/local/akamai/lib/glusterfs/3.1.0/xlator/cluster/afr.so(afr_sh_data_fstat_cbk+0xf8)[0xf6122ce8]
/usr/local/akamai/lib/glusterfs/3.1.0/xlator/cluster/dht.so(dht_attr_cbk+0xff)[0xf615d7cf]
/usr/local/akamai/lib/glusterfs/3.1.0/xlator/protocol/client.so(client3_1_fstat_cbk+0x331)[0xf6190bb1]
/usr/local/akamai/lib/libgfrpc.so.0(rpc_clnt_handle_reply+0xc2)[0xf76cdc42]
/usr/local/akamai/lib/libgfrpc.so.0(rpc_clnt_notify+0xa2)[0xf76cde62]
/usr/local/akamai/lib/libgfrpc.so.0(rpc_transport_notify+0x35)[0xf76c84c5]
/usr/local/akamai/lib/glusterfs/3.1.0/rpc-transport/socket.so(socket_event_poll_in+0x50)[0xf5e9f500]
/usr/local/akamai/lib/glusterfs/3.1.0/rpc-transport/socket.so(socket_event_handler+0x15b)[0xf5e9f67b]
/usr/local/akamai/lib/libglusterfs.so.0[0xf7708cff]
/usr/local/akamai/lib/libglusterfs.so.0(event_dispatch+0x21)[0xf7707a21]
glusterfsc(main+0x48c)[0x804c45c]
/lib/tls/i686/cmov/libc.so.6(__libc_start_main+0xdc)[0xf756e18c]
glusterfsc[0x804a631
attaching config files.
tx
Vikas
-------------- next part --------------
## file auto generated by /usr/local/bin/glusterfs-volgen (export.vol)
# Cmd line:
# $ /usr/local/bin/glusterfs-volgen --name gfs 172.24.0.68:/ghostcache/home/hsawhney/gfs/ 172.24.0.222:/ghostcache/home/hsawhney/gfs/
volume posix1
type storage/posix
option directory /ghostcache/gfs-export/
end-volume
volume locks1
type features/locks
subvolumes posix1
end-volume
#volume quota1
# type features/quota
# #option disk-usage-limit 100MB
# subvolumes locks1
#end-volume
volume brickex
type performance/io-threads
option thread-count 4
subvolumes locks1
end-volume
volume server-tcp
type protocol/server
option transport-type tcp
option auth.addr.brickex.allow *
option transport.socket.listen-port 6996
option transport.socket.nodelay on
subvolumes brickex
end-volume
-------------- next part --------------
# file auto generated by /usr/local/bin/glusterfs-volgen (mount.vol)
# Cmd line:
# $ /usr/local/bin/glusterfs-volgen --name gfs 172.24.0.68:/ghostcache/home/hsawhney/gfs/ 172.24.0.222:/ghostcache/home/hsawhney/gfs/
# TRANSPORT-TYPE tcp
volume 172.26.98.55-1
type protocol/client
option transport-type tcp
option remote-host 172.26.98.55
option transport.socket.nodelay on
option transport.remote-port 6996
option remote-subvolume brickex
end-volume
volume 172.26.98.56-1
type protocol/client
option transport-type tcp
option remote-host 172.26.98.56
option transport.socket.nodelay on
option transport.remote-port 6996
option remote-subvolume brickex
end-volume
volume 172.26.98.57-1
type protocol/client
option transport-type tcp
option remote-host 172.26.98.57
option transport.socket.nodelay on
option transport.remote-port 6996
option remote-subvolume brickex
end-volume
volume 172.26.98.59-1
type protocol/client
option transport-type tcp
option remote-host 172.26.98.59
option transport.socket.nodelay on
option transport.remote-port 6996
option remote-subvolume brickex
end-volume
#volume 172.26.98.61-1
# type protocol/client
# option transport-type tcp
# option remote-host 172.26.98.61
# option transport.socket.nodelay on
# option transport.remote-port 6996
# option remote-subvolume brickex
#end-volume
#volume 172.26.98.62-1
# type protocol/client
# option remote-host 172.26.98.62
# option transport.socket.nodelay on
# option transport.remote-port 6996
# option remote-subvolume brickex
#end-volume
volume distribute-1
type cluster/dht
subvolumes 172.26.98.55-1 172.26.98.56-1
end-volume
volume distribute-2
type cluster/dht
subvolumes 172.26.98.57-1 172.26.98.59-1
end-volume
#volume distribute-3
# type cluster/dht
# subvolumes 172.26.98.61-1 172.26.98.62-1
#end-volume
volume replicate-1
type cluster/afr
option lookup-unhashed yes
subvolumes distribute-1 distribute-2
#subvolumes distribute-1 distribute-2 distribute-3
end-volume
#volume stripe
# type cluster/stripe
# option block-size 1MB
# subvolumes replicate-1 replicate-2 replicate-3
#end-volume
volume writebehind
type performance/write-behind
option cache-size 4MB
subvolumes replicate-1
end-volume
volume io-cache
type performance/io-cache
option cache-size 64MB # default is 32MB
#option priority *.h:3,*.html:2,*:1 # default is '*:0'
option cache-timeout 2 # default is 1 second
subvolumes writebehind
end-volume
volume stat-prefetch
type performance/stat-prefetch
subvolumes io-cache
end-volume
More information about the Gluster-users
mailing list