[Gluster-users] Gluster client crashed in distributed - replicated setup

Vikas R vicky.ice at gmail.com
Thu Jan 13 11:23:16 UTC 2011


Hi


The setup was running on moderate load.

Attaching the Crash stack:


mapped/multiple_reduce.flash_pl.0.1294901932.1.172.26.98.57.0.map.10
[2011-01-13 08:03:39.797369] C [rpc-clnt.c:430:rpc_clnt_fill_request_info]
rpc-clnt: cannot lookup the saved frame corresponding to xid (2120535) for
msg arrived on transport 172.26.98.56-1
[2011-01-13 08:03:39.797609] E [rpc-clnt.c:338:saved_frames_unwind]
(-->/usr/local/akamai/lib/libgfrpc.so.0(rpc_clnt_notify+0xd8) [0xf7734e98]
(-->/usr/local/akamai/lib/libgfrpc.so.0(rpc_clnt_connection_cleanup+0x87)
[0xf7734567]
(-->/usr/local/akamai/lib/libgfrpc.so.0(saved_frames_destroy+0x26)
[0xf77344c6]))) rpc-clnt: forced unwinding frame type(GlusterFS 3.1)
op(LK(26)) called at 2011-01-13 07:34:28.102695
[2011-01-13 08:03:39.797656] E [rpc-clnt.c:338:saved_frames_unwind]
(-->/usr/local/akamai/lib/libgfrpc.so.0(rpc_clnt_notify+0xd8) [0xf7734e98]
(-->/usr/local/akamai/lib/libgfrpc.so.0(rpc_clnt_connection_cleanup+0x87)
[0xf7734567]
(-->/usr/local/akamai/lib/libgfrpc.so.0(saved_frames_destroy+0x26)
[0xf77344c6]))) rpc-clnt: forced unwinding frame type(GlusterFS 3.1)
op(LK(26)) called at 2011-01-13 07:35:30.578677
[2011-01-13 08:03:39.797699] E [rpc-clnt.c:338:saved_frames_unwind]
(-->/usr/local/akamai/lib/libgfrpc.so.0(rpc_clnt_notify+0xd8) [0xf7734e98]
(-->/usr/local/akamai/lib/libgfrpc.so.0(rpc_clnt_connection_cleanup+0x87)
[0xf7734567]
(-->/usr/local/akamai/lib/libgfrpc.so.0(saved_frames_destroy+0x26)
[0xf77344c6]))) rpc-clnt: forced unwinding frame type(GlusterFS 3.1)
op(LK(26)) called at 2011-01-13 08:03:39.797217
[2011-01-13 08:03:40.706095] W [fuse-bridge.c:570:fuse_fd_cbk]
glusterfs-fuse: 2377982: OPEN()
/streaming/set3/work/mapped/dpabort/multiple_reduce.flash_pl.1.1294901932.1.172.26.98.57.1.map.10
=> -1 (Input/output error)
[2011-01-13 08:03:47.796908] W [fuse-bridge.c:570:fuse_fd_cbk]
glusterfs-fuse: 2400049: OPEN()
/streaming/set3/work/reduce.15.1294902097.dplog => -1 (Input/output error)
[2011-01-13 08:03:50.346634] I
[client-handshake.c:699:select_server_supported_programs] 172.26.98.56-1:
Using Program GlusterFS-3.1.0, Num (1298437), Version (310)
[2011-01-13 08:03:50.346866] I [client-handshake.c:535:client_setvolume_cbk]
172.26.98.56-1: Connected to 172.26.98.56:6996, attached to remote volume
'brickex'.
[2011-01-13 08:04:21.538670] W [fuse-bridge.c:2765:fuse_setlk_cbk]
glusterfs-fuse: 2453376: ERR => -1 (Invalid argument)
[2011-01-13 08:04:24.212880] W [fuse-bridge.c:2765:fuse_setlk_cbk]
glusterfs-fuse: 2453612: ERR => -1 (Invalid argument)
[2011-01-13 08:04:33.382677] W [fuse-bridge.c:2765:fuse_setlk_cbk]
glusterfs-fuse: 2455241: ERR => -1 (Invalid argument)
[2011-01-13 08:04:36.99255] W [fuse-bridge.c:2765:fuse_setlk_cbk]
glusterfs-fuse: 2455478: ERR => -1 (Invalid argument

Attaching my config files .


tx

Vikas
-------------- next part --------------
## file auto generated by /usr/local/bin/glusterfs-volgen (export.vol)
# Cmd line:
# $ /usr/local/bin/glusterfs-volgen --name gfs 172.24.0.68:/ghostcache/home/hsawhney/gfs/ 172.24.0.222:/ghostcache/home/hsawhney/gfs/

volume posix1
  type storage/posix
  option directory /ghostcache/gfs-export/
end-volume

volume locks1
    type features/locks
    subvolumes posix1
end-volume

#volume quota1
#    type features/quota
#    #option disk-usage-limit 100MB
#    subvolumes locks1
#end-volume

volume brickex
    type performance/io-threads
    option thread-count 4
    subvolumes locks1
end-volume

volume server-tcp
    type protocol/server
    option transport-type tcp
    option auth.addr.brickex.allow *
    option transport.socket.listen-port 6996
    option transport.socket.nodelay on
    subvolumes brickex
end-volume
-------------- next part --------------
# file auto generated by /usr/local/bin/glusterfs-volgen (mount.vol)
# Cmd line:
# $ /usr/local/bin/glusterfs-volgen --name gfs 172.24.0.68:/ghostcache/home/hsawhney/gfs/ 172.24.0.222:/ghostcache/home/hsawhney/gfs/

# TRANSPORT-TYPE tcp
volume 172.26.98.55-1
    type protocol/client
    option transport-type tcp
    option remote-host 172.26.98.55
    option transport.socket.nodelay on
    option transport.remote-port 6996
    option remote-subvolume brickex
end-volume

volume 172.26.98.56-1
    type protocol/client
    option transport-type tcp
    option remote-host 172.26.98.56
    option transport.socket.nodelay on
    option transport.remote-port 6996
    option remote-subvolume brickex
end-volume

volume 172.26.98.57-1
    type protocol/client
    option transport-type tcp
    option remote-host 172.26.98.57
    option transport.socket.nodelay on
    option transport.remote-port 6996
    option remote-subvolume brickex
end-volume

volume 172.26.98.59-1
    type protocol/client
    option transport-type tcp
    option remote-host 172.26.98.59
    option transport.socket.nodelay on
    option transport.remote-port 6996
    option remote-subvolume brickex
end-volume

#volume 172.26.98.61-1
#    type protocol/client
#    option transport-type tcp
#    option remote-host 172.26.98.61
#    option transport.socket.nodelay on
#    option transport.remote-port 6996
#    option remote-subvolume brickex
#end-volume

#volume 172.26.98.62-1
#    type protocol/client
#    option remote-host 172.26.98.62
#    option transport.socket.nodelay on
#    option transport.remote-port 6996
#    option remote-subvolume brickex
#end-volume

volume distribute-1
    type cluster/dht
    subvolumes 172.26.98.55-1 172.26.98.56-1
end-volume

volume distribute-2
    type cluster/dht
    subvolumes 172.26.98.57-1 172.26.98.59-1
end-volume

#volume distribute-3
#    type cluster/dht
#    subvolumes 172.26.98.61-1 172.26.98.62-1
#end-volume

volume replicate-1
    type cluster/afr
    option lookup-unhashed yes
    subvolumes distribute-1 distribute-2
    #subvolumes distribute-1 distribute-2 distribute-3
end-volume

#volume stripe
#    type cluster/stripe
#    option block-size 1MB
#    subvolumes replicate-1 replicate-2 replicate-3
#end-volume

volume writebehind
    type performance/write-behind
    option cache-size 4MB
    subvolumes replicate-1
end-volume

volume io-cache
  type performance/io-cache
  option cache-size 64MB             # default is 32MB
  #option priority *.h:3,*.html:2,*:1 # default is '*:0'
  option cache-timeout 2             # default is 1 second
  subvolumes writebehind
end-volume

volume stat-prefetch
  type performance/stat-prefetch
  subvolumes io-cache
end-volume







More information about the Gluster-users mailing list